Data Preperation

Author

Julius Fenn

Published

August 18, 2025

Background Information

This is an R Markdown document. Instructions for writing these documents and background information can be found in the book written by Xie, Allaire, and Grolemund (2018) When you execute code within the document, the results appear beneath the code. This is an R Markdown document. Instructions for writing these documents and background information can be found in the book written by Xie, Allaire, and Grolemund (2018) When you execute code within the document, the results appear beneath the code. This file contains summary statistics, respectively the analysis step (confirmatory and exploratory analyses). Files are split into multiple subfiles like data processing and data analyses steps, which follows the classical data-analysis pipeline (see Peng and Matsui 2016; Wickham and Grolemund 2017).

Global variables

save_CAMs_as_pictures = TRUE
# consider_Protocol = FALSE # not needed at current stage

create raw data files

# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))

### load packages
require(pacman)
p_load('tidyverse', 'jsonlite', 'magrittr',
       'stargazer', 'psych', 'jtools', 'DT', 'igraph',
       'writexl')


### list data files
setwd("data")
folders <- list.files(pattern = "^study_result.*")

### create data files - GERMANY
# get CAM data
writeLines("", "CAMdata.txt") # create file
text_connection <- file("CAMdata.txt", "a") # open connection to append


# get pre CAM data
writeLines("", "preCAM.txt") # create file
text_connection_pre <- file("preCAM.txt", "a") # open connection to append

# get post CAM data
writeLines("", "postCAM.txt") # create file
text_connection_post <- file("postCAM.txt", "a") # open connection to append

for(i in 1:length(folders)){
  setwd(folders[i])
  if(length(dir()) == 3){
    # print(i)
    # print(dir())
    
    # pre CAM data
    setwd(dir()[1])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    tmp_id <- tmp$ID[!is.na(tmp$ID)]
    writeLines(jsonlite::toJSON(x = tmp), text_connection_pre)
    setwd("..")
    
    
    # CAM data
    setwd(dir()[2])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    if(tmp$creator != tmp_id){
      warning("IDs (primary keys) not matching")
    }
    writeLines(jsonlite::toJSON(x = tmp), text_connection)
    setwd("..")

    # post CAM data
    setwd(dir()[3])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    tmp$ID <- NA
    tmp$ID[2] <- tmp_id
    writeLines(jsonlite::toJSON(x = tmp), text_connection_post)
    setwd("..")
  }
  setwd("..")
}

close(text_connection) # close connection CAM
close(text_connection_pre) # close connection
close(text_connection_post) # close connection

### move files to output folder
# copy files (not overwritten)
tmp_file_from <-  getwd()
setwd("../outputs")
file.copy(from =  paste0(tmp_file_from, "/CAMdata.txt"), to = paste0(getwd(), "/CAMdata.txt"))

[1] FALSE

file.copy(from =  paste0(tmp_file_from, "/preCAM.txt"), to = paste0(getwd(), "/preCAM.txt"))

[1] FALSE

file.copy(from =  paste0(tmp_file_from, "/postCAM.txt"), to = paste0(getwd(), "/postCAM.txt"))

[1] FALSE

# remove files
file.remove(paste0(tmp_file_from, "/CAMdata.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/preCAM.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/postCAM.txt"))

[1] TRUE

### load functions
# print(getwd())
setwd("../functions")
for(i in 1:length(dir())){
  # print(dir()[i])
  source(dir()[i], encoding = "utf-8")
}


setwd("../functions_CAMapp")
for(i in 1:length(dir())){
  # print(dir()[i])
  source(dir()[i], encoding = "utf-8")
}
rm(i)



rm(tmp); rm(tmp_id); rm(folders); rm(tmp_file_from)
rm(prolific1); rm(prolific2)
rm(text_connection); rm(text_connection_post); rm(text_connection_postSecond); rm(text_connection_pre); rm(text_connection_second)

set up data.frame questionnaires

load data:

setwd("outputs")
# > pre study
suppressMessages(read_file('preCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_preCAM

# > post first CAM
suppressMessages(read_file('postCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_postCAM

rename ID variable for data sets to avoid errors:

colnames(dat_preCAM)[colnames(dat_preCAM) == "ID"] <- "PROLIFIC_PID"
colnames(dat_postCAM)[colnames(dat_postCAM) == "ID"] <- "PROLIFIC_PID"

add unique ID variable to match rows to participants, only keep complete data sets:

### create counter variable for both data sets
# pre study
dat_preCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_preCAM)){
  if(!is.na(dat_preCAM$sender[i]) && dat_preCAM$sender[i] == "Greetings"){
    # tmp <- dat_preCAM$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_preCAM$ID[i] <- tmp_IDcounter
}



# post study
dat_postCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_postCAM)){
  if(!is.na(dat_postCAM$sender[i]) && dat_postCAM$sender[i] == "break500ms_2"){
    # tmp <- dat_postCAM$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_postCAM$ID[i] <- tmp_IDcounter
}


### keep only complete data sets
# pre-study
# sort(table(dat_preCAM$ID))
sum(table(dat_preCAM$ID) != max(table(dat_preCAM$ID)))

[1] 0

sum(table(dat_preCAM$ID) == max(table(dat_preCAM$ID)))

[1] 18

dat_preCAM <- dat_preCAM[dat_preCAM$ID %in% names(table(dat_preCAM$ID))[table(dat_preCAM$ID) == max(table(dat_preCAM$ID))],]

# post-study
# sort(table(dat_postCAM$ID))
sum(table(dat_postCAM$ID) != max(table(dat_postCAM$ID)))

[1] 0

sum(table(dat_postCAM$ID) == max(table(dat_postCAM$ID)))

[1] 18

dat_postCAM <- dat_postCAM[dat_postCAM$ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) == max(table(dat_postCAM$ID))],]


all(unique(dat_preCAM$ID) %in% unique(dat_postCAM$ID))

[1] TRUE

questionnaire pre-study (component 1)

colnames(dat_preCAM)

 [1] "sender"                  "sender_type"            
 [3] "sender_id"               "6"                      
 [5] "ended_on"                "duration"               
 [7] "time_run"                "time_render"            
 [9] "time_show"               "time_end"               
[11] "time_commit"             "timestamp"              
[13] "time_switch"             "IDtype"                 
[15] "PROLIFIC_PID"            "dummy_informedconsent"  
[17] "commCheck"               "meta.labjs_version"     
[19] "meta.location"           "meta.userAgent"         
[21] "meta.platform"           "meta.language"          
[23] "meta.locale"             "meta.timeZone"          
[25] "meta.timezoneOffset"     "meta.screen_width"      
[27] "meta.screen_height"      "meta.scroll_width"      
[29] "meta.scroll_height"      "meta.window_innerWidth" 
[31] "meta.window_innerHeight" "meta.devicePixelRatio"  
[33] "meta.labjs_build.flavor" "meta.labjs_build.commit"
[35] "para_defocuscount"       "7"                      
[37] "ID"

tmp_notNumeric <- str_subset(string = colnames(dat_preCAM), pattern = "^meta|^R")
tmp_notNumeric <- str_subset(string = tmp_notNumeric, pattern = "labjs|location", negate = TRUE)


vec_ques <- c("PROLIFIC_PID",
              "IDtype",
              "dummy_informedconsent", 
              "commCheck", 
              tmp_notNumeric)

vec_notNumeric = c("PROLIFIC_PID", "IDtype", tmp_notNumeric)

questionnaire_c1 <- questionnairetype(dataset = dat_preCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)


dim(questionnaire_c1)

[1] 18 18

colnames(questionnaire_c1)

 [1] "ID"                      "PROLIFIC_PID"           
 [3] "IDtype"                  "dummy_informedconsent"  
 [5] "commCheck"               "meta.userAgent"         
 [7] "meta.platform"           "meta.language"          
 [9] "meta.locale"             "meta.timeZone"          
[11] "meta.timezoneOffset"     "meta.screen_width"      
[13] "meta.screen_height"      "meta.scroll_width"      
[15] "meta.scroll_height"      "meta.window_innerWidth" 
[17] "meta.window_innerHeight" "meta.devicePixelRatio"

questionnaire post-CAM (component 3)

colnames(dat_postCAM)

 [1] "sender"                      "sender_type"                
 [3] "sender_id"                   "ended_on"                   
 [5] "duration"                    "time_run"                   
 [7] "time_render"                 "time_show"                  
 [9] "time_end"                    "time_commit"                
[11] "timestamp"                   "time_switch"                
[13] "changeCAM_valence"           "14"                         
[15] "PROLIFIC_PID"                "feedCAM_repres"             
[17] "feedCAM_technicalprobs"      "feedCAM_technicalprobsText" 
[19] "feedCAM_already"             "feedCAM_alreadyText"        
[21] "para_countclicks"            "PGD-10"                     
[23] "PGD-9"                       "PGD-3"                      
[25] "PGD-6"                       "PGD-4"                      
[27] "PGD-7"                       "PGD-11"                     
[29] "PGD-5"                       "PGD-2"                      
[31] "PGD-8"                       "PGD-12"                     
[33] "PGD-1"                       "CSJAS-item1"                
[35] "CSJAS-item2"                 "CSJAS-item10r"              
[37] "CSJAS-item6r"                "CSJAS-item4r"               
[39] "CSJAS-item8"                 "CSJAS-item7r"               
[41] "CSJAS-item3r"                "CSJAS-item9"                
[43] "CSJAS-item5"                 "CCDisImpair-anxiety5r"      
[45] "CCDisImpair-anger3r"         "CCDisImpair-impairment2r"   
[47] "CCDisImpair-anxiety1"        "CCDisImpair-impairment4"    
[49] "CCDisImpair-anger2"          "CCDisImpair-anxiety2r"      
[51] "CCDisImpair-impairment8"     "CCDisImpair-anger5r"        
[53] "CCDisImpair-anxiety3"        "CCDisImpair-sadness1r"      
[55] "CCDisImpair-sadness3"        "CCDisImpair-impairment7"    
[57] "CCDisImpair-anxiety4"        "CCDisImpair-sadness4"       
[59] "CCDisImpair-impairment6r"    "CCDisImpair-sadness5"       
[61] "CCDisImpair-impairment3"     "CCDisImpair-sadness2"       
[63] "CCDisImpair-anger4r"         "CCDisImpair-impairment1"    
[65] "CCDisImpair-impairment5r"    "CCDisImpair-anger1"         
[67] "guttman-item"                "not_needed"                 
[69] "guttman-response"            "PtMAtGD-8"                  
[71] "PtMAtGD-2"                   "PtMAtGD-15"                 
[73] "PtMAtGD-14"                  "PtMAtGD-11"                 
[75] "PtMAtGD-3"                   "PtMAtGD-7"                  
[77] "PtMAtGD-6"                   "PtMAtGD-10"                 
[79] "PtMAtGD-1"                   "PtMAtGD-9"                  
[81] "PtMAtGD-4"                   "PtMAtGD-5"                  
[83] "PtMAtGD-13"                  "PtMAtGD-12"                 
[85] "sociodemo_age"               "sociodemo_gender"           
[87] "sociodemo_sexualOrientation" "sociodemo_residency"        
[89] "lrscale"                     "rlgdgr"                     
[91] "feedback_critic"             "para_defocuscount"          
[93] "15"                          "ID"

vec_ques <- c("PROLIFIC_PID",
              "feedCAM_repres", 
              "feedCAM_technicalprobs", "feedCAM_technicalprobsText",
              "feedCAM_already", "feedCAM_alreadyText",
              "changeCAM_valence",
              sort(str_subset(string = colnames(dat_postCAM), pattern = "^PGD")),
              sort(str_subset(string = colnames(dat_postCAM), pattern = "^CCDisImpair")),
              sort(str_subset(string = colnames(dat_postCAM), pattern = "^PtMAtGD")),
              "sociodemo_age", "sociodemo_gender", "sociodemo_sexualOrientation", "sociodemo_residency",
              "lrscale",  "rlgdgr",
              "feedback_critic")


vec_notNumeric = c("PROLIFIC_PID", 
                   "feedCAM_technicalprobsText", "feedCAM_alreadyText", "scenario_thoughts",
                   "changeCAM_valence",
                   "sociodemo_gender", "sociodemo_sexualOrientation", "sociodemo_residency",
                   "feedback_critic" )

questionnaire_c3 <- questionnairetype(dataset = dat_postCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)

dim(questionnaire_c3)

[1] 18 65

colnames(questionnaire_c3)

 [1] "ID"                          "PROLIFIC_PID"               
 [3] "feedCAM_repres"              "feedCAM_technicalprobs"     
 [5] "feedCAM_technicalprobsText"  "feedCAM_already"            
 [7] "feedCAM_alreadyText"         "changeCAM_valence"          
 [9] "PGD-1"                       "PGD-10"                     
[11] "PGD-11"                      "PGD-12"                     
[13] "PGD-2"                       "PGD-3"                      
[15] "PGD-4"                       "PGD-5"                      
[17] "PGD-6"                       "PGD-7"                      
[19] "PGD-8"                       "PGD-9"                      
[21] "CCDisImpair-anger1"          "CCDisImpair-anger2"         
[23] "CCDisImpair-anger3r"         "CCDisImpair-anger4r"        
[25] "CCDisImpair-anger5r"         "CCDisImpair-anxiety1"       
[27] "CCDisImpair-anxiety2r"       "CCDisImpair-anxiety3"       
[29] "CCDisImpair-anxiety4"        "CCDisImpair-anxiety5r"      
[31] "CCDisImpair-impairment1"     "CCDisImpair-impairment2r"   
[33] "CCDisImpair-impairment3"     "CCDisImpair-impairment4"    
[35] "CCDisImpair-impairment5r"    "CCDisImpair-impairment6r"   
[37] "CCDisImpair-impairment7"     "CCDisImpair-impairment8"    
[39] "CCDisImpair-sadness1r"       "CCDisImpair-sadness2"       
[41] "CCDisImpair-sadness3"        "CCDisImpair-sadness4"       
[43] "CCDisImpair-sadness5"        "PtMAtGD-1"                  
[45] "PtMAtGD-10"                  "PtMAtGD-11"                 
[47] "PtMAtGD-12"                  "PtMAtGD-13"                 
[49] "PtMAtGD-14"                  "PtMAtGD-15"                 
[51] "PtMAtGD-2"                   "PtMAtGD-3"                  
[53] "PtMAtGD-4"                   "PtMAtGD-5"                  
[55] "PtMAtGD-6"                   "PtMAtGD-7"                  
[57] "PtMAtGD-8"                   "PtMAtGD-9"                  
[59] "sociodemo_age"               "sociodemo_gender"           
[61] "sociodemo_sexualOrientation" "sociodemo_residency"        
[63] "lrscale"                     "rlgdgr"                     
[65] "feedback_critic"

get ratings of guttman

# Pre-define the column names with rat_ pattern
guttman_cols <- sort(str_subset(string = colnames(dat_postCAM), pattern = "^guttman"))

# Create an empty list to store processed data
result_list <- list()

dat_merged <- rbind(dat_postCAM[,  c("PROLIFIC_PID", "ID", "sender", guttman_cols)])
dat_merged$`guttman-response`[!is.na(dat_merged$`guttman-item`) & dat_merged$`guttman-response` == "NA"] <- "LNA"

# Loop over each unique ID
for (i in unique(dat_merged$ID)) {
  tmp <- dat_merged[dat_merged$ID == i, c("ID", "sender", guttman_cols)]
  
  # Fill down ID if missing
  tmp <- tmp %>%
    fill(ID, .direction = "downup")
  
  # Filter rows where all rat_ columns are not NA
tmp <- tmp %>% 
  filter(
    if_any(all_of(guttman_cols), ~ !is.na(.))     # at least one guttman_* value present
  )
tmp$sender[is.na(tmp$sender)] <- paste0("GI_", tmp$`guttman-item`[!is.na(tmp$`guttman-item`)])
  
  # Append to result list
  result_list[[as.character(i)]] <- tmp
}

# Combine all into one dataframe
questionnaire_rating_long <- bind_rows(result_list)
questionnaire_rating_long$sender <- factor(questionnaire_rating_long$sender)


# wide data set for guttman ratings
colnames(questionnaire_rating_long)[colnames(questionnaire_rating_long) == "guttman-item"] <- "item"
colnames(questionnaire_rating_long)[colnames(questionnaire_rating_long) == "guttman-response"] <- "response"

questionnaire_rating_wide_guttman <- questionnaire_rating_long %>%
  pivot_wider(
    id_cols = ID,
    names_from = item,
    values_from = response,
    names_glue = "response_{item}"
  )

merge all data sets

# Left join the others one by one
merged_data <- questionnaire_c1 %>%
  left_join(questionnaire_c3, by = "ID") %>%
  left_join(questionnaire_rating_wide_guttman, by = "ID")


merged_data$PROLIFIC_PID.y <- NULL

colnames(merged_data)[colnames(merged_data) == "PROLIFIC_PID.x"] <- "PROLIFIC_PID"



dim(merged_data)

[1] 18 85

colnames(merged_data)

 [1] "ID"                          "PROLIFIC_PID"               
 [3] "IDtype"                      "dummy_informedconsent"      
 [5] "commCheck"                   "meta.userAgent"             
 [7] "meta.platform"               "meta.language"              
 [9] "meta.locale"                 "meta.timeZone"              
[11] "meta.timezoneOffset"         "meta.screen_width"          
[13] "meta.screen_height"          "meta.scroll_width"          
[15] "meta.scroll_height"          "meta.window_innerWidth"     
[17] "meta.window_innerHeight"     "meta.devicePixelRatio"      
[19] "feedCAM_repres"              "feedCAM_technicalprobs"     
[21] "feedCAM_technicalprobsText"  "feedCAM_already"            
[23] "feedCAM_alreadyText"         "changeCAM_valence"          
[25] "PGD-1"                       "PGD-10"                     
[27] "PGD-11"                      "PGD-12"                     
[29] "PGD-2"                       "PGD-3"                      
[31] "PGD-4"                       "PGD-5"                      
[33] "PGD-6"                       "PGD-7"                      
[35] "PGD-8"                       "PGD-9"                      
[37] "CCDisImpair-anger1"          "CCDisImpair-anger2"         
[39] "CCDisImpair-anger3r"         "CCDisImpair-anger4r"        
[41] "CCDisImpair-anger5r"         "CCDisImpair-anxiety1"       
[43] "CCDisImpair-anxiety2r"       "CCDisImpair-anxiety3"       
[45] "CCDisImpair-anxiety4"        "CCDisImpair-anxiety5r"      
[47] "CCDisImpair-impairment1"     "CCDisImpair-impairment2r"   
[49] "CCDisImpair-impairment3"     "CCDisImpair-impairment4"    
[51] "CCDisImpair-impairment5r"    "CCDisImpair-impairment6r"   
[53] "CCDisImpair-impairment7"     "CCDisImpair-impairment8"    
[55] "CCDisImpair-sadness1r"       "CCDisImpair-sadness2"       
[57] "CCDisImpair-sadness3"        "CCDisImpair-sadness4"       
[59] "CCDisImpair-sadness5"        "PtMAtGD-1"                  
[61] "PtMAtGD-10"                  "PtMAtGD-11"                 
[63] "PtMAtGD-12"                  "PtMAtGD-13"                 
[65] "PtMAtGD-14"                  "PtMAtGD-15"                 
[67] "PtMAtGD-2"                   "PtMAtGD-3"                  
[69] "PtMAtGD-4"                   "PtMAtGD-5"                  
[71] "PtMAtGD-6"                   "PtMAtGD-7"                  
[73] "PtMAtGD-8"                   "PtMAtGD-9"                  
[75] "sociodemo_age"               "sociodemo_gender"           
[77] "sociodemo_sexualOrientation" "sociodemo_residency"        
[79] "lrscale"                     "rlgdgr"                     
[81] "feedback_critic"             "response_4"                 
[83] "response_3"                  "response_1"                 
[85] "response_2"

save all data sets

setwd("outputs/questionnaire")

# Save as .RData objects
save(questionnaire_c1, file = "questionnaire_c1.RData")
save(questionnaire_c3, file = "questionnaire_c3.RData")
save(merged_data, file = "merged_data.RData")
save(questionnaire_rating_long, file = "questionnaire_rating_long.RData")

# Save as Excel files
write_xlsx(questionnaire_c1, "questionnaire_c1.xlsx")
write_xlsx(questionnaire_c3, "questionnaire_c3.xlsx")
write_xlsx(merged_data, "merged_data.xlsx")
write_xlsx(questionnaire_rating_long, "questionnaire_rating_long.xlsx")

set up CAM data

pre CAM (component 2)

Load CAM data

setwd("outputs")
suppressMessages(read_file("CAMdata.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_pre

raw_CAM_pre <- list()
for(i in 1:length(dat_CAM_pre)){
  raw_CAM_pre[[i]] <- jsonlite::fromJSON(txt = dat_CAM_pre[[i]])
}

Create CAM files, draw CAMs and compute network indicators

### create CAM single files (nodes, connectors, merged)
CAMfiles_pre <- create_CAMfiles(datCAM = raw_CAM_pre, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  37 
 # deleted connectors:  14

# remove testing data sets
nrow(CAMfiles_pre[[1]])

[1] 261

# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][nchar(CAMfiles_pre[[1]]$participantCAM) == 24,]
# CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][nchar(CAMfiles_pre[[2]]$participantCAM) == 24,]
# CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][nchar(CAMfiles_pre[[3]]$participantCAM.x) == 24,]
# number of CAMs collected
# nrow(CAMfiles_pre[[1]])


# remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_pre[[1]]$participantCAM[CAMfiles_pre[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
# print(tmp_pid)

## remove person from questionnaire data
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% tmp_pid,]

## remove person from CAM data
# table(CAMfiles_pre[[1]][CAMfiles_pre[[1]]$participantCAM %in% tmp_pid,]$text)
# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][!CAMfiles_pre[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][!CAMfiles_pre[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][!CAMfiles_pre[[3]]$participantCAM.x %in% tmp_pid,]


# remove 7 empty concepts:
# CAMfiles_pre[[1]]$text[nchar(CAMfiles_pre[[1]]$text) < 2]
# tmp_ids <- CAMfiles_pre[[1]]$id[nchar(CAMfiles_pre[[1]]$text) < 2]
# table(CAMfiles_pre[[1]]$isActive[CAMfiles_pre[[1]]$id %in% tmp_ids])
# table(CAMfiles_pre[[1]]$participantCAM[CAMfiles_pre[[1]]$id %in% tmp_ids])
# 
# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][!CAMfiles_pre[[1]]$id %in% tmp_ids,]


### draw CAMs
CAMdrawn_pre <- draw_CAM(dat_merged = CAMfiles_pre[[3]],
                     dat_nodes = CAMfiles_pre[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 18 CAMs...

Warning: `graph.data.frame()` was deprecated in igraph 2.0.0.
ℹ Please use `graph_from_data_frame()` instead.

[1] "== participantCAM in drawnCAM"

for(i in 1:length(CAMdrawn_pre)){
  if(any(nchar(V(CAMdrawn_pre[[i]])$label) < 3)){
    print(V(CAMdrawn_pre[[i]])$label)
  }
}


### network indicators
tmp_microIndicator <- c("Konservativ", "Woke")
networkIndicators_pre <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_pre, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)

Warning: `graph.density()` was deprecated in igraph 2.0.0.
ℹ Please use `edge_density()` instead.

Warning: The `types1` argument of `assortativity()` is deprecated as of igraph 1.6.0.
ℹ Please use the `values` argument instead.

Warning: `assortativity.degree()` was deprecated in igraph 2.0.0.
ℹ Please use `assortativity_degree()` instead.

Warning: `as.undirected()` was deprecated in igraph 2.1.0.
ℹ Please use `as_undirected()` instead.

### wordlist
CAMwordlist_pre <- create_wordlist(
  dat_nodes =  CAMfiles_pre[[1]],
  dat_merged =  CAMfiles_pre[[3]],
  useSummarized = TRUE,
  order = "frequency",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

[1] "create_wordlist - use raw words"
[1] 0
[1] 261
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 18 CAMs... 
[1] "== participantCAM in drawnCAM"

if(all(nchar(CAMwordlist_pre$Words) > 2)){
  print("sucessfully removed empty words")
}

DT::datatable(CAMwordlist_pre, options = list(pageLength = 5))

save CAMs as .json files, and as .png (igraph)

if(save_CAMs_as_pictures){
setwd("outputs")

setwd("savedCAMs")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .png files have been deleted')
}

### if no participant ID was provided replace by randomly generated CAM ID

if(all(CAMfiles_pre[[3]]$participantCAM.x == "noID")){
  CAMfiles_pre[[3]]$participantCAM.x <- CAMfiles_pre[[3]]$CAM.x
}

### save as .json files, and as .png (igraph)
ids_CAMs <- unique(CAMfiles_pre[[3]]$participantCAM.x); length(ids_CAMs)


for(i in 1:length(ids_CAMs)){
  save_graphic(filename = paste0("CAM", "_t1_", ids_CAMs[i])) #  paste0(ids_CAMs[i]))
  CAM_igraph <- CAMdrawn_pre[[c(1:length(CAMdrawn_pre))[
    names(CAMdrawn_pre) == paste0(unique(CAMfiles_pre[[3]]$participantCAM.x)[i])]]]
  plot(CAM_igraph, edge.arrow.size = .7,
       layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
       vertex.size = 10, vertex.label.cex = .9)
  dev.off()
}

setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .json files have been deleted')
}
for(i in 1:length(raw_CAM_pre)){
  if(!is_empty(raw_CAM_pre[[i]]$nodes)){
    if(nrow(raw_CAM_pre[[i]]$nodes) > 5){
      write(toJSON(raw_CAM_pre[[i]], encoding = "UTF-8"),
            paste0(raw_CAM_pre[[i]]$creator, ".json"))
    }
  }
}
}


!
      all former .png files have been deleted


!
      all former .json files have been deleted

merge and save all data

setwd("outputs/final")


if(all(unique(CAMfiles_pre[[1]]$participantCAM) == networkIndicators_pre$participantCAM)){
    print("pre CAM ID can be set")
  networkIndicators_pre$CAM_ID <- unique(CAMfiles_pre[[1]]$CAM)
}

[1] "pre CAM ID can be set"

### remove all previously removed participants
nrow(networkIndicators_pre)

[1] 18

nrow(merged_data)

[1] 18

nrow(networkIndicators_pre)

[1] 18

### match data
if (all(merged_data$PROLIFIC_PID == networkIndicators_pre$participantCAM)) {
  print("all data can be matched row by row")
  
  
  
  # save questionnaire
  merged_data$participantCAMpre <- networkIndicators_pre$participantCAM

  ## save as .xlsx file
  xlsx::write.xlsx2(x = merged_data, file = "merged_data_final.xlsx")
  ## save as R object
  saveRDS(merged_data, file = "merged_data_final.rds")
  
  
  # save network indicators pre
  ## save as .xlsx file
  xlsx::write.xlsx2(x = networkIndicators_pre, file = "networkIndicators_pre_final.xlsx")
  ## save as R object
  saveRDS(networkIndicators_pre, file = "networkIndicators_pre_final.rds")
  
  
  # save CAMfiles pre
  saveRDS(CAMfiles_pre, file = "CAMfiles_pre_final.rds")
  
    # save CAMfiles combined and clean
  # saveRDS(CAMfiles_combined_translated, file = "CAMfiles_combined_final_translated.rds")
  

  merged_dataCAMs <- cbind(merged_data, networkIndicators_pre)
  dim(merged_dataCAMs)
  
  ## save as .xlsx file
  xlsx::write.xlsx2(x = merged_dataCAMs, file = "merged_dataCAMs_final.xlsx")
  ## save as .csv file
  write.csv2(x = merged_dataCAMs, file = "merged_dataCAMs_final.csv")
  ## save as R object
  saveRDS(merged_dataCAMs, file = "merged_dataCAMs_final.rds")
  
  ### save wordlist
    ## save as .xlsx file
  xlsx::write.xlsx2(x = CAMwordlist_pre, file = "CAMwordlist_pre_final.xlsx")
  ## save as .csv file
  write.csv2(x = CAMwordlist_pre, file = "CAMwordlist_pre_final.csv")
  ## save as R object
  saveRDS(CAMwordlist_pre, file = "CAMwordlist_pre_final.rds")
  
  
}

[1] "all data can be matched row by row"

References

Peng, Roger D., and Elizabeth Matsui. 2016. The Art of Data Science: A Guide for Anyone Who Works with Data. Lulu.com. https://bookdown.org/rdpeng/artofdatascience/.

Wickham, Hadley, and Garrett Grolemund. 2017. R for Data Science: Import, Tidy, Transform, Visualize, and Model Data. "O’Reilly Media, Inc.". https://r4ds.had.co.nz/.

Xie, Yihui, J. J. Allaire, and Garrett Grolemund. 2018. R Markdown: The Definitive Guide. New York: Chapman; Hall/CRC. https://doi.org/10.1201/9781138359444.