Data preperation for S2 main CAM study

Author

Julius Fenn, Louisa Estadieu

Notes

create raw data files

# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))

### load packages
require(pacman)
p_load('tidyverse', 'jsonlite', 'magrittr', 'xlsx',
       'stargazer', 'psych', 'jtools', 'DT', 'ggstatsplot', 
       'lavaan', 
       'regsem', 'MplusAutomation', 'igraph')


### load socio-demographic data
setwd("data_demographic")
prolific1 <- read.csv(file = "prolific_export_6576dab8c6f593e69b2c4246.csv", header = TRUE)
prolific2 <- read.csv(file = "prolific_export_657827d7dc1b45092f59eb51.csv", header = TRUE)

prolific <- rbind(prolific1, prolific2)



### list data files
setwd("../data")
folders <- list.files(pattern = "^study_result.*")

### create data files - GERMANY
# get CAM data
writeLines("", "CAMdata.txt") # create file
text_connection <- file("CAMdata.txt", "a") # open connection to append

# get CAM data second
writeLines("", "secondCAMdata.txt") # create file
text_connection_second <- file("secondCAMdata.txt", "a") # open connection to append

# get pre CAM data
writeLines("", "preCAM.txt") # create file
text_connection_pre <- file("preCAM.txt", "a") # open connection to append

# get post CAM data
writeLines("", "postCAM.txt") # create file
text_connection_post <- file("postCAM.txt", "a") # open connection to append

# get post second CAM data
writeLines("", "secondPostCAM.txt") # create file
text_connection_postSecond <- file("secondPostCAM.txt", "a") # open connection to append

for(i in 1:length(folders)){
  setwd(folders[i])
  if(length(dir()) == 5){
    # print(i)
    
    
    # pre CAM data
    setwd(dir()[1])
    tmp_pre <- jsonlite::fromJSON(txt = "data.txt")
    writeLines(jsonlite::toJSON(x = tmp_pre), text_connection_pre)
    setwd("..")
    
    
    # CAM data
    setwd(dir()[2])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    
    # add Prolific PID
    if(sum(!is.na(tmp_pre$PROLIFIC_PID)) != 1){
      cat("Error in assigning prolific PID in index", i, "\n")
      break
    }
    
    tmp$creator <- tmp_pre$PROLIFIC_PID[!is.na(tmp_pre$PROLIFIC_PID)]
    writeLines(jsonlite::toJSON(x = tmp), text_connection)
    setwd("..")

    # post CAM data
    setwd(dir()[3])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    
    # add Prolific PID
    tmp$PROLIFIC_PID <- NA
    tmp$PROLIFIC_PID[2] <- tmp_pre$PROLIFIC_PID[!is.na(tmp_pre$PROLIFIC_PID)]
    
    writeLines(jsonlite::toJSON(x = tmp), text_connection_post)
    setwd("..")
    
    # CAM data second
    setwd(dir()[4])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    
    ## add Prolific PID
    tmp$creator <- tmp_pre$PROLIFIC_PID[!is.na(tmp_pre$PROLIFIC_PID)]
    
    writeLines(jsonlite::toJSON(x = tmp), text_connection_second)
    setwd("..")
    
    # post CAM data second
    setwd(dir()[5])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    
    # add Prolific PID
    tmp$PROLIFIC_PID <- NA
    tmp$PROLIFIC_PID[2] <- tmp_pre$PROLIFIC_PID[!is.na(tmp_pre$PROLIFIC_PID)]
    
    writeLines(jsonlite::toJSON(x = tmp), text_connection_postSecond)
    setwd("..")
  }
  setwd("..")
}

close(text_connection) # close connection CAM
close(text_connection_pre) # close connection
close(text_connection_post) # close connection
close(text_connection_second) # close connection CAM
close(text_connection_postSecond) # close connection

### move files to output folder
# copy files (not overwritten)
tmp_file_from <-  getwd()
setwd("../outputs/01_dataPreperation")
file.copy(from =  paste0(tmp_file_from, "/CAMdata.txt"), to = paste0(getwd(), "/CAMdata.txt"))

[1] FALSE

file.copy(from =  paste0(tmp_file_from, "/preCAM.txt"), to = paste0(getwd(), "/preCAM.txt"))

[1] FALSE

file.copy(from =  paste0(tmp_file_from, "/postCAM.txt"), to = paste0(getwd(), "/postCAM.txt"))

[1] FALSE

file.copy(from =  paste0(tmp_file_from, "/secondPostCAM.txt"), to = paste0(getwd(), "/secondPostCAM.txt"))

[1] FALSE

file.copy(from =  paste0(tmp_file_from, "/secondCAMdata.txt"), to = paste0(getwd(), "/secondCAMdata.txt"))

[1] FALSE

# remove files
file.remove(paste0(tmp_file_from, "/CAMdata.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/preCAM.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/postCAM.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/secondPostCAM.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/secondCAMdata.txt"))

[1] TRUE

### load functions
# print(getwd())
setwd("../../../../functions")
for(i in 1:length(dir())){
  # print(dir()[i])
  source(dir()[i], encoding = "utf-8")
}


setwd("../functions_CAMapp")
for(i in 1:length(dir())){
  # print(dir()[i])
  source(dir()[i], encoding = "utf-8")
}
rm(i)



### summary function
data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      se = sd(x[[col]], na.rm=TRUE) / sqrt(length(x[[col]])))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,
                  varname)
  data_sum <- plyr::rename(data_sum, c("mean" = varname))
  return(data_sum)
}

set up data.frame questionnaires

setwd("outputs/01_dataPreperation")
# > pre study
suppressMessages(read_file('preCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_preCAM

# > post first CAM
suppressMessages(read_file('postCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_postCAM

# > post second CAM
suppressMessages(read_file('secondPostCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_secondPostCAM



### create counter variable for both data sets
# pre study
dat_preCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_preCAM)){
  if(!is.na(dat_preCAM$sender[i]) && dat_preCAM$sender[i] == "Greetings"){
    # tmp <- dat_preCAM$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_preCAM$ID[i] <- tmp_IDcounter
}



# post study
dat_postCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_postCAM)){
  if(!is.na(dat_postCAM$sender[i]) && dat_postCAM$sender[i] == "CAMfeedbackGeneral"){
    # tmp <- dat_postCAM$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_postCAM$ID[i] <- tmp_IDcounter
}

# second post study
#> fix error in "sender variable"
for(i in 1:nrow(dat_secondPostCAM)){
  if(is.na(dat_secondPostCAM$sender[i])){
    if(!is.na(dat_secondPostCAM$sender[i+1])){
      dat_secondPostCAM$sender[i] <- "adaptiveAnswer"
    }
  }
}



dat_secondPostCAM$ID <- NA
tmp_IDcounter <- 0
for(i in 1:nrow(dat_secondPostCAM)){
  if(!is.na(dat_secondPostCAM$sender[i]) && dat_secondPostCAM$sender[i] == "adaptiveAnswer"){
    # tmp <- dat_secondPostCAM$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_secondPostCAM$ID[i] <- tmp_IDcounter
}



### keep only complete data sets
# pre-study
# sort(table(dat_preCAM$ID))
sum(table(dat_preCAM$ID) != max(table(dat_preCAM$ID)))

[1] 0

sum(table(dat_preCAM$ID) == max(table(dat_preCAM$ID)))

[1] 227

dat_preCAM <- dat_preCAM[dat_preCAM$ID %in% names(table(dat_preCAM$ID))[table(dat_preCAM$ID) == max(table(dat_preCAM$ID))],]

# post-study
# sort(table(dat_postCAM$ID))
sum(table(dat_postCAM$ID) != max(table(dat_postCAM$ID)))

[1] 2

sum(table(dat_postCAM$ID) == max(table(dat_postCAM$ID)))

[1] 226

# dat_postCAM <- dat_postCAM[dat_postCAM$ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) == max(table(dat_postCAM$ID))],]
dat_postCAM <- dat_postCAM[dat_postCAM$ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) >= 4],]

# post-study second
# sort(table(dat_secondPostCAM$ID))
sum(table(dat_secondPostCAM$ID) != max(table(dat_secondPostCAM$ID)))

[1] 2

sum(table(dat_secondPostCAM$ID) == max(table(dat_secondPostCAM$ID)))

[1] 226

# dat_secondPostCAM <- dat_secondPostCAM[dat_secondPostCAM$ID %in% names(table(dat_secondPostCAM$ID))[table(dat_secondPostCAM$ID) == max(table(dat_secondPostCAM$ID))],]
dat_secondPostCAM <- dat_secondPostCAM[dat_secondPostCAM$ID %in% names(table(dat_secondPostCAM$ID))[table(dat_secondPostCAM$ID) >= 11],]




### json (from JATOS) to 2D data.frame
# pre-study
tmp_notNumeric <- str_subset(string = colnames(dat_preCAM), pattern = "^meta|^R")
tmp_notNumeric <- str_subset(string = tmp_notNumeric, pattern = "labjs|location", negate = TRUE)

vec_ques <- c("PROLIFIC_PID",
                "choosen_Robot", 
              "dummy_informedconsent", 
              "commCheck", tmp_notNumeric)

vec_notNumeric = c("PROLIFIC_PID",
                "choosen_Robot", tmp_notNumeric)

questionnaire_preCAM <- questionnairetype(dataset = dat_preCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)


dim(questionnaire_preCAM)

[1] 227  18

# post-study
vec_ques <- c("PROLIFIC_PID",
              "feedCAM_repres", "feedCAM_technicalprobs", "feedCAM_technicalprobsText",
              "feedCAM_already", "feedCAM_alreadyText")

vec_notNumeric = c("PROLIFIC_PID", 
                   "feedCAM_technicalprobsText", "feedCAM_alreadyText")

questionnaire_postCAM <- questionnairetype(dataset = dat_postCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)

# post-study second
tmp_numeric <- str_subset(string = colnames(dat_secondPostCAM), pattern = "^GAToRS|^Almere|^LiWang")


vec_ques <- c("PROLIFIC_PID", 
              "ans1",
                tmp_numeric,
                "feedback_critic")

vec_notNumeric = c("PROLIFIC_PID",
                   "ans1",
                   "feedback_critic")

questionnaire_secondPostCAM <- questionnairetype(dataset = dat_secondPostCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)


dim(questionnaire_secondPostCAM)

[1] 227  39

### merge all data sets
questionnaire <-  left_join(x = questionnaire_preCAM, y = questionnaire_postCAM, by='PROLIFIC_PID') %>%
                left_join(., questionnaire_secondPostCAM, by='PROLIFIC_PID') 

questionnaire$ID.x <- NULL
questionnaire$ID.y <- NULL

dim(questionnaire)

[1] 227  60

# change name of ans1
names(questionnaire)[names(questionnaire) == "ans1"] <- "adaptiveQuestion"

# remove testing data sets
questionnaire <- questionnaire[nchar(questionnaire$PROLIFIC_PID) == 24,]


### add prolific data
prolific <- prolific[prolific$Participant.id %in% questionnaire$PROLIFIC_PID,]
prolific <- prolific %>%
  arrange(sapply(Participant.id, function(y) which(y == questionnaire$PROLIFIC_PID)))


if(nrow(prolific) == nrow(questionnaire)){
  print("prolific data sucessfully added")
  
  questionnaire$socio_age <- prolific$Age
  questionnaire$socio_sex <- prolific$Sex
  questionnaire$socio_ethnicity <- prolific$Ethnicity.simplified
  questionnaire$socio_student <- prolific$Student.status
  questionnaire$socio_employment <- prolific$Employment.status
  questionnaire$total_min_prolific <- prolific$Time.taken / 60
  ## all time outs to NA
  questionnaire$total_min_prolific[questionnaire$total_min_prolific > 1000] <- NA
  ## all expired data to NA
  questionnaire[questionnaire == "DATA_EXPIRED"] <- NA
  
  questionnaire$socio_age <- as.numeric(questionnaire$socio_age)
}

[1] "prolific data sucessfully added"

## all missing answers to NA
questionnaire[questionnaire == ""] <- NA
## all feedback smaller than 3 characters to NA
questionnaire$feedback_critic[nchar(questionnaire$feedback_critic) <= 2] <- NA

### save files
## save as .xlsx file
xlsx::write.xlsx2(x = questionnaire, file = "questionnaire.xlsx")
## save as R object
saveRDS(questionnaire, file = "questionnaire.rds")

get reaction times for single components

Plot time taken (in minutes) by participants for single components of study:

dat_duration <- data.frame(duration = NA, sender = NA, ID = NA, PROLIFIC_PID = NA)

for(i in 1:length(unique(dat_secondPostCAM$ID))){

  tmp_PID <- dat_secondPostCAM$PROLIFIC_PID[dat_secondPostCAM$ID ==  unique(dat_secondPostCAM$ID)[i] & !is.na(dat_secondPostCAM$PROLIFIC_PID)]
  
  

     # pre CAM
    tmp_preCAM <- data.frame(duration = dat_preCAM$duration[dat_preCAM$ID == unique(dat_preCAM$ID)[i]] / 1000,
                    sender = dat_preCAM$sender[dat_preCAM$ID == unique(dat_preCAM$ID)[i]])
    tmp_preCAM <- tmp_preCAM[!is.na(tmp_preCAM$sender),]
    
     # post CAM
    tmp_postCAM <- data.frame(duration = dat_postCAM$duration[dat_postCAM$ID == unique(dat_postCAM$ID)[i]] / 1000,
                    sender = dat_postCAM$sender[dat_postCAM$ID == unique(dat_postCAM$ID)[i]])
    tmp_postCAM <- tmp_postCAM[!is.na(tmp_postCAM$sender),]
    
    # pre CAM
    tmp_secondPostCAM <- data.frame(duration = dat_secondPostCAM$duration[dat_secondPostCAM$ID == unique(dat_secondPostCAM$ID)[i]] / 1000,
                    sender = dat_secondPostCAM$sender[dat_secondPostCAM$ID == unique(dat_secondPostCAM$ID)[i]])
    tmp_secondPostCAM <- tmp_secondPostCAM[!is.na(tmp_secondPostCAM$sender),] 
  
    tmp <- rbind(tmp_preCAM, tmp_postCAM, tmp_secondPostCAM)
    

  if(all(is.na(dat_duration))){
    dat_duration <- data.frame(duration = tmp$duration,
                              sender = tmp$sender,
                              ID = rep(i, times=nrow(tmp)),
                              PROLIFIC_PID = rep(tmp_PID, times=nrow(tmp)))


  }else{
    dat_duration <- rbind(dat_duration,  data.frame(duration = tmp$duration,
                                                    sender = tmp$sender,
                                                    ID = rep(i, times=nrow(tmp)),
                                                    PROLIFIC_PID = rep(tmp_PID, times=nrow(tmp))))
  }
}

## remove empty sender 
dat_duration <- dat_duration[!is.na(dat_duration$sender), ]
dat_duration <- dat_duration[!is.na(dat_duration$duration), ]

dat_duration$sender[dat_duration$sender == "done"] <- "CAM instructions"

## save as .xlsx
# write.xlsx2(x = dat_duration, file = "outputs/para_duration_singleComponents.xlsx")

#### plot
dat_duration$ID <- factor(dat_duration$ID)
p <- dat_duration %>%
  ggplot(aes(x=sender, y=duration, color=PROLIFIC_PID)) +
  geom_point() +
  geom_jitter(width=0.15)+
  theme(axis.text.x = element_text(angle = 90)) + theme(legend.position="none")
p

## save ggplot as PDF
ggsave(filename = "outputs/01_dataPreperation/durations_components.pdf", p)


# Calculate the mean duration in seconds for each sender and sort by mean duration
tmp <- dat_duration %>%
  group_by(sender) %>%
  summarise(N = n(), mean_duration = mean(duration, na.rm = TRUE)) %>%
  arrange(desc(mean_duration))
DT::datatable(tmp, options = list(pageLength = 5))

set up CAM data

pre

Load CAM data

setwd("outputs/01_dataPreperation")
suppressMessages(read_file("CAMdata.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_pre

raw_CAM_pre <- list()
for(i in 1:length(dat_CAM_pre)){
  raw_CAM_pre[[i]] <- jsonlite::fromJSON(txt = dat_CAM_pre[[i]])
}

Create CAM files, draw CAMs and compute network indicators

### create CAM single files (nodes, connectors, merged)
CAMfiles_pre <- create_CAMfiles(datCAM = raw_CAM_pre, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  352 
 # deleted connectors:  96

# remove testing data sets
CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][nchar(CAMfiles_pre[[1]]$participantCAM) == 24,]
CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][nchar(CAMfiles_pre[[2]]$participantCAM) == 24,]
CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][nchar(CAMfiles_pre[[3]]$participantCAM.x) == 24,]

# remove CAM with many empty concepts
## remove person from survey data
tmp_pid <- unique(CAMfiles_pre[[1]]$participantCAM[CAMfiles_pre[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% tmp_pid,]

## remove person from CAM data
CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][CAMfiles_pre[[1]]$CAM != "a0c6edeb-267a-4f27-8199-79f896e033ce",]
CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][CAMfiles_pre[[2]]$CAM != "a0c6edeb-267a-4f27-8199-79f896e033ce",]
CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][CAMfiles_pre[[3]]$CAM.x != "a0c6edeb-267a-4f27-8199-79f896e033ce",]




# remove empty concepts:
CAMfiles_pre[[1]]$text[nchar(CAMfiles_pre[[1]]$text) < 2]

[1] "" "" "" "" "" "" ""

tmp_ids <- CAMfiles_pre[[1]]$id[nchar(CAMfiles_pre[[1]]$text) < 2]
table(CAMfiles_pre[[1]]$isActive[CAMfiles_pre[[1]]$id %in% tmp_ids])


TRUE 
   7

CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][!CAMfiles_pre[[1]]$id %in% tmp_ids,]


### draw CAMs
CAMdrawn_pre <- draw_CAM(dat_merged = CAMfiles_pre[[3]],
                     dat_nodes = CAMfiles_pre[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 225 CAMs... 
[1] "== participantCAM in drawnCAM"

for(i in 1:length(CAMdrawn_pre)){
  if(any(nchar(V(CAMdrawn_pre[[i]])$label) < 3)){
    print(V(CAMdrawn_pre[[i]])$label)
  }
}

 [1] "Rettungsroboter"          "Nachteile"               
 [3] "Vorteile"                 "Einsatz"                 
 [5] "in Gefahrenzonen"         "in Höhlen"               
 [7] "robustes Material"        "keine Ablenkung"         
 [9] "AI"                       "Wen zuerst retten?"      
[11] "Autonomes Handeln?"       "Verantwortlich für Leben"
[13] "Ethik"                    "Giftige Orte"            
[15] "Unter Wasser"             "wiederstandsfähig"       
[17] "durch Emotionen"          "durch Verletzungen"      
[19] "durch Hunger/Durst"       "Welche Priotiäten?"      
[21] "Falsche Bevorzugung"     
 [1] "Roboter"                  "Vorteile"                
 [3] "Nachteile"                "Schlechte Programmierung"
 [5] "Übermütig"                "Ausnutzen"               
 [7] "Teuer"                    "Menschlichkeit lernen"   
 [9] "Menschliche Natur"        "Arbeitsentlasung"        
[11] "Mehr Rettungen"           "Technischer Fortschritt" 
[13] "KI"

### network indicators
tmp_microIndicator <- c("Rettungsroboter", "sozialer Assistenzroboter", "Vorteile", "Nachteile")
networkIndicators_pre <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_pre, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)


### wordlist
CAMwordlist_pre <- create_wordlist(
  dat_nodes =  CAMfiles_pre[[1]],
  dat_merged =  CAMfiles_pre[[3]],
  useSummarized = TRUE,
  order = "frequency",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

[1] "create_wordlist - use raw words"
[1] 0
[1] 3076
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 225 CAMs... 
[1] "== participantCAM in drawnCAM"

if(all(nchar(CAMwordlist_pre$Words) > 2)){
  print("sucessfully removed empty words")
}

DT::datatable(CAMwordlist_pre, options = list(pageLength = 5))

save CAMs as .json files, and as .png (igraph)

save_CAMs_as_pictures = FALSE

if(save_CAMs_as_pictures){
setwd("outputs/01_dataPreperation")

setwd("savedCAMs_pre")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .png files have been deleted')
}

### if no participant ID was provided replace by randomly generated CAM ID

if(all(CAMfiles_pre[[3]]$participantCAM.x == "noID")){
  CAMfiles_pre[[3]]$participantCAM.x <- CAMfiles_pre[[3]]$CAM.x
}

### save as .json files, and as .png (igraph)
ids_CAMs <- unique(CAMfiles_pre[[3]]$participantCAM.x); length(ids_CAMs)


for(i in 1:length(ids_CAMs)){
  save_graphic(filename = paste0("CAM", "_t1_", ids_CAMs[i])) #  paste0(ids_CAMs[i]))
  CAM_igraph <- CAMdrawn_pre[[c(1:length(CAMdrawn_pre))[
    names(CAMdrawn_pre) == paste0(unique(CAMfiles_pre[[3]]$participantCAM.x)[i])]]]
  plot(CAM_igraph, edge.arrow.size = .7,
       layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
       vertex.size = 10, vertex.label.cex = .9)
  dev.off()
}

setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .json files have been deleted')
}
for(i in 1:length(raw_CAM_pre)){
  if(!is_empty(raw_CAM_pre[[i]]$nodes)){
    if(nrow(raw_CAM_pre[[i]]$nodes) > 5){
      write(toJSON(raw_CAM_pre[[i]], encoding = "UTF-8"),
            paste0(raw_CAM_pre[[i]]$creator, ".json"))
    }
  }
}
}

post

Load CAM data

setwd("outputs/01_dataPreperation")
suppressMessages(read_file("secondCAMdata.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_post

raw_CAM_post <- list()
for(i in 1:length(dat_CAM_post)){
  raw_CAM_post[[i]] <- jsonlite::fromJSON(txt = dat_CAM_post[[i]])
}

Create CAM files, draw CAMs and compute network indicators

### create CAM single files (nodes, connectors, merged)
CAMfiles_post <- create_CAMfiles(datCAM = raw_CAM_post, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  148 
 # deleted connectors:  80

# remove testing data sets
CAMfiles_post[[1]] <- CAMfiles_post[[1]][nchar(CAMfiles_post[[1]]$participantCAM) == 24,]
CAMfiles_post[[2]] <- CAMfiles_post[[2]][nchar(CAMfiles_post[[2]]$participantCAM) == 24,]
CAMfiles_post[[3]] <- CAMfiles_post[[3]][nchar(CAMfiles_post[[3]]$participantCAM.x) == 24,]

# remove CAM with many empty concepts
CAMfiles_post[[1]] <- CAMfiles_post[[1]][CAMfiles_post[[1]]$CAM != "8d74f576-e617-4eb1-8ccf-93589ce6c65b",]
CAMfiles_post[[2]] <- CAMfiles_post[[2]][CAMfiles_post[[2]]$CAM != "8d74f576-e617-4eb1-8ccf-93589ce6c65b",]
CAMfiles_post[[3]] <- CAMfiles_post[[3]][CAMfiles_post[[3]]$CAM.x != "8d74f576-e617-4eb1-8ccf-93589ce6c65b",]


# remove empty concepts:
CAMfiles_post[[1]]$text[nchar(CAMfiles_post[[1]]$text) < 2  & CAMfiles_post[[1]]$text != "f"]

[1] "" "" "" ""

tmp_ids <- CAMfiles_post[[1]]$id[nchar(CAMfiles_post[[1]]$text) < 2 & CAMfiles_post[[1]]$text != "f"]
table(CAMfiles_post[[1]]$isActive[CAMfiles_post[[1]]$id %in% tmp_ids])


TRUE 
   4

CAMfiles_post[[1]] <- CAMfiles_post[[1]][!CAMfiles_post[[1]]$id %in% tmp_ids,]


### draw CAMs
CAMdrawn_post <- draw_CAM(dat_merged = CAMfiles_post[[3]],
                     dat_nodes = CAMfiles_post[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 225 CAMs... 
[1] "== participantCAM in drawnCAM"

### network indicators
tmp_microIndicator <- c("Rettungsroboter", "sozialer Assistenzroboter", "Vorteile", "Nachteile")
networkIndicators_post <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_post, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)


# wordlist
CAMwordlist_post <- create_wordlist(
  dat_nodes =  CAMfiles_post[[1]],
  dat_merged =  CAMfiles_post[[3]],
  order = "frequency",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

[1] "create_wordlist - use raw words"
[1] 0
[1] 3532
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 225 CAMs... 
[1] "== participantCAM in drawnCAM"

if(all(nchar(CAMwordlist_post$Words) > 2)){
  print("sucessfully removed empty words")
}else{
  CAMwordlist_post$Words[nchar(CAMwordlist_post$Words) < 2]
}

[1] "f"

DT::datatable(CAMwordlist_post, options = list(pageLength = 5))

save CAMs as .json files, and as .png (igraph)

save_CAMs_as_pictures = FALSE

if(save_CAMs_as_pictures){
setwd("outputs/01_dataPreperation")

setwd("savedCAMs_post")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .png files have been deleted')
}

### if no participant ID was provided replace by randomly generated CAM ID

if(all(CAMfiles_post[[3]]$participantCAM.x == "noID")){
  CAMfiles_post[[3]]$participantCAM.x <- CAMfiles_post[[3]]$CAM.x
}

### save as .json files, and as .png (igraph)
ids_CAMs <- unique(CAMfiles_post[[3]]$participantCAM.x); length(ids_CAMs)


for(i in 1:length(ids_CAMs)){
  save_graphic(filename = paste0("CAM", "_t2_", ids_CAMs[i])) #  paste0(ids_CAMs[i]))
  CAM_igraph <- CAMdrawn_post[[c(1:length(CAMdrawn_post))[
    names(CAMdrawn_post) == paste0(unique(CAMfiles_post[[3]]$participantCAM.x)[i])]]]
  plot(CAM_igraph, edge.arrow.size = .7,
       layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
       vertex.size = 10, vertex.label.cex = .9)
  dev.off()
}

setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .json files have been deleted')
}
for(i in 1:length(raw_CAM_post)){
  if(!is_empty(raw_CAM_post[[i]]$nodes)){
    if(nrow(raw_CAM_post[[i]]$nodes) > 5){
      write(toJSON(raw_CAM_post[[i]], encoding = "UTF-8"),
            paste0(raw_CAM_post[[i]]$creator, ".json"))
    }
  }
}
}

identify types of changes (delta CAM)

### set A, B, C, D types
# !!! i = 215
if (all(unique(CAMfiles_pre[[1]]$participantCAM) == unique(CAMfiles_post[[1]]$participantCAM))) {
  vec_type <- c()
  error <- 0
  verbose = FALSE
  
  ##
  list_newWords_text <- list()
  list_newWords_value <- list()
  list_ids <- list()
  h = 1
  for (i in 1:length(unique(CAMfiles_pre[[1]]$participantCAM))) {
    praeCAM <-
      CAMfiles_pre[[1]][CAMfiles_pre[[1]]$participantCAM == unique(CAMfiles_pre[[1]]$participantCAM)[i],]
    postCAM <-
      CAMfiles_post[[1]][CAMfiles_post[[1]]$participantCAM == unique(CAMfiles_post[[1]]$participantCAM)[i],]
    
    ## to test:
    # praeCAM$text %in% postCAM$text
    # postCAM$text %in% praeCAM$text
    # length(praeCAM$text)
    # length(postCAM$text)
    # praeCAM$text
    # postCAM$text
    
    ## Typ A
    if (all(postCAM$text %in% praeCAM$text) &
        length(postCAM$text) < length(praeCAM$text)) {
      vec_type[i] <- "A"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
    }
    
    ## Typ B
    if (all(praeCAM$text %in% postCAM$text) &
        length(postCAM$text) > length(praeCAM$text)) {
      vec_type[i] <- "B"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
      
      ## get words and values
      list_newWords_text[[h]] <-
        postCAM$text[!postCAM$text %in% praeCAM$text]
      list_newWords_value[[h]] <-
        postCAM$value[!postCAM$text %in% praeCAM$text]
      list_ids[[h]] <- postCAM$id[!postCAM$text %in% praeCAM$text]
      
      h = h + 1
    }
    
    ## Typ C
    if (all(praeCAM$text %in% postCAM$text) &
        all(postCAM$text %in% praeCAM$text)) {
      vec_type[i] <- "C"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
    }
    
    ## Typ D
    # smaller > pr? UE post, post UE pr?
    if (sum(praeCAM$text %in% postCAM$text) < length(praeCAM$text) &
        sum(postCAM$text %in% praeCAM$text) < length(postCAM$text)) {
      vec_type[i] <- "D"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
    }
    
    if (error > 1) {
      print("ERROR in (not exclusive logical condition)", i)
      stop("check your data and adjust this function")
    }
    error = 0
  }
}


table(vec_type)

vec_type
  A   B   C   D 
  1 145  39  39

barplot(table(unlist(list_newWords_value)))

# sort(table(unlist(list_newWords_text)))


### add data
nrow(questionnaire); length(vec_type)

[1] 225

[1] 225

questionnaire$typeChange <- vec_type 


dat_newWords <- data.frame(id = unlist(list_ids), 
                           text = unlist(list_newWords_text), 
                           value = unlist(list_newWords_value))
DT::datatable(dat_newWords, options = list(pageLength = 5))

combine CAM data and apply protocol CAM-App

Load CAM data

setwd("outputs/01_dataPreperation")
suppressMessages(read_file("CAMdata_combined.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_combined

raw_CAM_combined <- list()
for(i in 1:length(dat_CAM_combined)){
  raw_CAM_combined[[i]] <- jsonlite::fromJSON(txt = dat_CAM_combined[[i]])
}

Create CAM files, draw CAMs and compute network indicators

consider_Protocol = TRUE



### create CAM single files (nodes, connectors, merged)
CAMfiles_combined <- create_CAMfiles(datCAM = raw_CAM_combined, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  500 
 # deleted connectors:  176

# remove testing data sets
CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][nchar(CAMfiles_combined[[1]]$participantCAM) == 24,]
CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][nchar(CAMfiles_combined[[2]]$participantCAM) == 24,]
CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][nchar(CAMfiles_combined[[3]]$participantCAM.x) == 24,]

# remove CAM with many empty concepts
CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b"),]
CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b"),]
CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$CAM.x %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b"),]


# remove empty concepts:
CAMfiles_combined[[1]]$text[nchar(CAMfiles_combined[[1]]$text) < 2  & CAMfiles_combined[[1]]$text != "f"]

 [1] "" "" "" "" "" "" "" "" "" "" ""

tmp_ids <- CAMfiles_combined[[1]]$id[nchar(CAMfiles_combined[[1]]$text) < 2 & CAMfiles_combined[[1]]$text != "f"]
table(CAMfiles_combined[[1]]$isActive[CAMfiles_combined[[1]]$id %in% tmp_ids])


TRUE 
  11

CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$id %in% tmp_ids,]


### add protocol # 
if(consider_Protocol){
  setwd("outputs/01_dataPreperation")

  text <- readLines("protocol_after_word2vec.txt", warn = FALSE)
  text <- readLines(textConnection(text, encoding="UTF-8"), encoding="UTF-8")

  if (testIfJson(file = text)) {
    protocol <- rjson::fromJSON(file = "protocol_after_word2vec.txt")
  
    ## no CAM deleted  
  # CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% protocol$currentCAMs,]
  # CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][CAMfiles_combined[[2]]$CAM %in% protocol$currentCAMs,]
  # CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][CAMfiles_combined[[3]]$CAM.x %in% protocol$currentCAMs,]


  tmp_out <- overwriteTextNodes(protocolDat = protocol,
                                nodesDat = CAMfiles_combined[[1]])
  CAMfiles_combined[[1]] <- tmp_out[[1]]
  # tmp_out[[2]]
  
  } else{
    print("Invalid protocol uploaded")
  }
}


time 2024-01-30 09:05:58.107549 at index 1 for approximate matching 
time 2024-01-30 09:06:07.285121 at index 2 for approximate matching 
time 2024-01-30 09:06:08.861007 at index 3 for approximate matching 
time 2024-01-30 09:06:11.532449 at index 4 for approximate matching 
time 2024-01-30 09:06:15.449462 at index 5 for approximate matching 
time 2024-01-30 09:06:32.09116 at index 6 for approximate matching 
time 2024-01-30 09:08:20.401772 at index 7 for approximate matching 
time 2024-01-30 09:08:27.05376 at index 8 for approximate matching 
time 2024-01-30 09:08:35.309873 at index 9 for approximate matching 
time 2024-01-30 09:08:42.188456 at index 10 for approximate matching 
time 2024-01-30 09:08:47.489243 at index 11 for approximate matching 
time 2024-01-30 09:08:51.822573 at index 12 for approximate matching 
time 2024-01-30 09:09:00.892489 at index 13 for approximate matching 
time 2024-01-30 09:09:17.238047 at index 14 for approximate matching 
time 2024-01-30 09:09:23.696046 at index 15 for approximate matching 
time 2024-01-30 09:09:28.551117 at index 16 for approximate matching 
time 2024-01-30 09:09:30.878831 at index 17 for approximate matching 
time 2024-01-30 09:09:46.034248 at index 18 for approximate matching 
time 2024-01-30 09:09:51.244462 at index 19 for approximate matching 
time 2024-01-30 09:09:54.806778 at index 20 for approximate matching 
time 2024-01-30 09:09:59.077254 at index 21 for approximate matching 
time 2024-01-30 09:10:04.964584 at index 22 for approximate matching 
time 2024-01-30 09:10:12.817818 at index 23 for approximate matching 
time 2024-01-30 09:10:18.892842 at index 24 for approximate matching 
time 2024-01-30 09:10:33.218676 at index 25 for approximate matching 
time 2024-01-30 09:10:39.654777 at index 26 for approximate matching 
time 2024-01-30 09:10:54.024783 at index 27 for approximate matching 
time 2024-01-30 09:11:00.825529 at index 28 for approximate matching 
time 2024-01-30 09:11:04.267163 at index 29 for approximate matching 
time 2024-01-30 09:11:36.265956 at index 30 for approximate matching 
time 2024-01-30 09:11:39.41114 at index 31 for approximate matching 
time 2024-01-30 09:11:50.33368 at index 32 for approximate matching 
time 2024-01-30 09:11:53.968456 at index 33 for approximate matching 
time 2024-01-30 09:11:59.210091 at index 34 for approximate matching 
time 2024-01-30 09:12:03.340299 at index 35 for approximate matching 
time 2024-01-30 09:12:29.403103 at index 36 for approximate matching 
time 2024-01-30 09:12:31.799614 at index 37 for approximate matching 
time 2024-01-30 09:12:45.38969 at index 38 for approximate matching 
time 2024-01-30 09:12:53.483914 at index 39 for approximate matching 
time 2024-01-30 09:13:00.619616 at index 40 for approximate matching 
time 2024-01-30 09:13:06.135471 at index 41 for approximate matching 
time 2024-01-30 09:13:12.689052 at index 42 for approximate matching 
time 2024-01-30 09:13:18.423744 at index 43 for approximate matching 
time 2024-01-30 09:13:23.440615 at index 44 for approximate matching 
time 2024-01-30 09:13:26.321477 at index 45 for approximate matching 
time 2024-01-30 09:13:47.933281 at index 46 for approximate matching 
time 2024-01-30 09:14:06.774211 at index 47 for approximate matching 
time 2024-01-30 09:14:10.866974 at index 48 for approximate matching 
time 2024-01-30 09:14:19.423517 at index 49 for approximate matching 
time 2024-01-30 09:14:25.307824 at index 50 for approximate matching 
time 2024-01-30 09:14:29.830818 at index 51 for approximate matching 
time 2024-01-30 09:14:34.904091 at index 52 for approximate matching 
time 2024-01-30 09:14:55.63685 at index 53 for approximate matching 
time 2024-01-30 09:15:01.543675 at index 54 for approximate matching 
time 2024-01-30 09:15:05.271876 at index 55 for approximate matching 
time 2024-01-30 09:15:08.630882 at index 56 for approximate matching 
time 2024-01-30 09:15:30.889735 at index 57 for approximate matching 
time 2024-01-30 09:15:34.888144 at index 58 for approximate matching 
time 2024-01-30 09:15:40.339594 at index 59 for approximate matching 
time 2024-01-30 09:16:05.176845 at index 60 for approximate matching 
time 2024-01-30 09:16:09.576636 at index 61 for approximate matching 
time 2024-01-30 09:16:41.886149 at index 62 for approximate matching 
time 2024-01-30 09:16:46.646111 at index 63 for approximate matching 
time 2024-01-30 09:16:52.565343 at index 64 for approximate matching 
time 2024-01-30 09:16:54.94748 at index 65 for approximate matching 
time 2024-01-30 09:17:00.484807 at index 66 for approximate matching 
time 2024-01-30 09:17:02.847554 at index 67 for approximate matching 
time 2024-01-30 09:17:18.055018 at index 68 for approximate matching 
time 2024-01-30 09:17:21.680035 at index 69 for approximate matching 
time 2024-01-30 09:17:39.020315 at index 70 for approximate matching 
time 2024-01-30 09:17:44.665456 at index 71 for approximate matching 
time 2024-01-30 09:17:51.654347 at index 72 for approximate matching 
time 2024-01-30 09:17:55.523829 at index 73 for approximate matching 
time 2024-01-30 09:18:12.26924 at index 74 for approximate matching 
time 2024-01-30 09:18:21.347903 at index 75 for approximate matching 
time 2024-01-30 09:18:55.09138 at index 76 for approximate matching 
time 2024-01-30 09:19:17.496681 at index 77 for approximate matching 
time 2024-01-30 09:19:20.491197 at index 78 for approximate matching 
time 2024-01-30 09:20:04.054224 at index 79 for approximate matching 
time 2024-01-30 09:20:06.206856 at index 80 for approximate matching 
time 2024-01-30 09:20:28.620212 at index 81 for approximate matching 
time 2024-01-30 09:20:39.01334 at index 82 for approximate matching 
time 2024-01-30 09:20:42.008472 at index 83 for approximate matching 
time 2024-01-30 09:21:13.035549 at index 84 for approximate matching 
time 2024-01-30 09:21:16.407575 at index 85 for approximate matching 
time 2024-01-30 09:21:23.054959 at index 86 for approximate matching 
time 2024-01-30 09:21:30.196167 at index 87 for approximate matching 
time 2024-01-30 09:21:33.903075 at index 88 for approximate matching 
time 2024-01-30 09:21:38.166652 at index 89 for approximate matching 
time 2024-01-30 09:21:56.020483 at index 90 for approximate matching 
time 2024-01-30 09:22:14.362535 at index 91 for approximate matching 
time 2024-01-30 09:22:34.054561 at index 92 for approximate matching 
time 2024-01-30 09:22:41.423701 at index 93 for approximate matching 
time 2024-01-30 09:22:44.405934 at index 94 for approximate matching 
time 2024-01-30 09:22:55.906814 at index 95 for approximate matching 
time 2024-01-30 09:23:00.293414 at index 96 for approximate matching 
time 2024-01-30 09:23:12.882126 at index 97 for approximate matching 
time 2024-01-30 09:23:24.278825 at index 98 for approximate matching 
time 2024-01-30 09:23:28.249727 at index 99 for approximate matching 
time 2024-01-30 09:23:34.173073 at index 100 for approximate matching 
time 2024-01-30 09:23:38.899285 at index 101 for approximate matching 
time 2024-01-30 09:23:42.72488 at index 102 for approximate matching 
time 2024-01-30 09:23:45.054003 at index 103 for approximate matching 
time 2024-01-30 09:23:53.555377 at index 104 for approximate matching 
time 2024-01-30 09:24:02.102295 at index 105 for approximate matching 
time 2024-01-30 09:24:04.524137 at index 106 for approximate matching 
time 2024-01-30 09:24:19.071782 at index 107 for approximate matching 
time 2024-01-30 09:24:38.843123 at index 108 for approximate matching 
time 2024-01-30 09:24:42.471636 at index 109 for approximate matching 
time 2024-01-30 09:24:45.647624 at index 110 for approximate matching 
time 2024-01-30 09:25:06.29956 at index 111 for approximate matching 
time 2024-01-30 09:25:10.847232 at index 112 for approximate matching 
time 2024-01-30 09:25:16.137951 at index 113 for approximate matching 
time 2024-01-30 09:25:19.394353 at index 114 for approximate matching 
time 2024-01-30 09:25:23.426698 at index 115 for approximate matching 
time 2024-01-30 09:25:30.176009 at index 116 for approximate matching 
time 2024-01-30 09:25:35.42967 at index 117 for approximate matching 
time 2024-01-30 09:25:44.521288 at index 118 for approximate matching 
time 2024-01-30 09:26:04.928644 at index 119 for approximate matching 
time 2024-01-30 09:26:07.640153 at index 120 for approximate matching 
time 2024-01-30 09:26:39.113532 at index 121 for approximate matching 
time 2024-01-30 09:26:59.191921 at index 122 for approximate matching 
time 2024-01-30 09:27:02.659192 at index 123 for approximate matching 
time 2024-01-30 09:27:06.096431 at index 124 for approximate matching 
time 2024-01-30 09:27:10.699471 at index 125 for approximate matching 
time 2024-01-30 09:27:15.440025 at index 126 for approximate matching 
time 2024-01-30 09:27:33.618857 at index 127 for approximate matching 
time 2024-01-30 09:27:41.595474 at index 128 for approximate matching 
time 2024-01-30 09:28:24.765775 at index 129 for approximate matching 
time 2024-01-30 09:28:30.214952 at index 130 for approximate matching 
time 2024-01-30 09:28:34.007822 at index 131 for approximate matching 
time 2024-01-30 09:28:37.828649 at index 132 for approximate matching 
time 2024-01-30 09:28:46.689435 at index 133 for approximate matching 
time 2024-01-30 09:29:02.261669 at index 134 for approximate matching 
time 2024-01-30 09:29:26.638285 at index 135 for approximate matching 
time 2024-01-30 09:29:29.919472 at index 136 for approximate matching 
time 2024-01-30 09:29:32.821631 at index 137 for approximate matching 
time 2024-01-30 09:29:36.957035 at index 138 for approximate matching 
time 2024-01-30 09:29:42.108839 at index 139 for approximate matching 
time 2024-01-30 09:29:46.987462 at index 140 for approximate matching 
time 2024-01-30 09:29:56.220139 at index 141 for approximate matching 
time 2024-01-30 09:30:03.462438 at index 142 for approximate matching 
time 2024-01-30 09:30:08.555759 at index 143 for approximate matching 
time 2024-01-30 09:30:14.225816 at index 144 for approximate matching 
time 2024-01-30 09:30:19.491435 at index 145 for approximate matching 
time 2024-01-30 09:30:26.172877 at index 146 for approximate matching 
time 2024-01-30 09:30:39.226614 at index 147 for approximate matching 
time 2024-01-30 09:30:44.821054 at index 148 for approximate matching 
time 2024-01-30 09:31:16.631706 at index 149 for approximate matching 
time 2024-01-30 09:31:28.517348 at index 150 for approximate matching 
time 2024-01-30 09:31:37.940384 at index 151 for approximate matching 
time 2024-01-30 09:31:58.848891 at index 152 for approximate matching 
time 2024-01-30 09:32:04.239705 at index 153 for approximate matching 
time 2024-01-30 09:32:09.067416 at index 154 for approximate matching 
time 2024-01-30 09:32:14.572809 at index 155 for approximate matching 
time 2024-01-30 09:32:21.388538 at index 156 for approximate matching 
time 2024-01-30 09:32:26.770836 at index 157 for approximate matching 
time 2024-01-30 09:32:34.250764 at index 158 for approximate matching 
time 2024-01-30 09:32:45.077212 at index 159 for approximate matching 
time 2024-01-30 09:33:07.491493 at index 160 for approximate matching 
time 2024-01-30 09:33:51.180969 at index 161 for approximate matching 
time 2024-01-30 09:33:59.523487 at index 162 for approximate matching 
time 2024-01-30 09:34:35.512881 at index 163 for approximate matching 
time 2024-01-30 09:35:21.584897 at index 164 for approximate matching 
time 2024-01-30 09:35:25.321651 at index 165 for approximate matching 
time 2024-01-30 09:47:53.502211 at index 166 for approximate matching 
time 2024-01-30 09:47:53.621005 at index 167 for approximate matching 
time 2024-01-30 09:51:29.615021 at index 168 for approximate matching 
time 2024-01-30 13:28:20.871211 at index 169 for approximate matching 
time 2024-01-30 13:28:23.199671 at index 170 for approximate matching 
time 2024-01-30 13:28:26.858094 at index 171 for approximate matching 
time 2024-01-30 13:28:28.519124 at index 172 for approximate matching 
time 2024-01-30 13:28:44.767435 at index 173 for approximate matching 
time 2024-01-30 13:28:47.649923 at index 174 for approximate matching 
time 2024-01-30 13:36:26.956501 at index 175 for approximate matching 
time 2024-01-30 13:37:01.973651 at index 176 for approximate matching 
time 2024-01-30 13:37:04.591347 at index 177 for approximate matching 
time 2024-01-30 13:37:07.453839 at index 178 for approximate matching 
time 2024-01-30 13:37:59.73067 at index 179 for approximate matching 
time 2024-01-30 13:38:05.017236 at index 180 for approximate matching 
time 2024-01-30 13:42:01.204679 at index 181 for approximate matching 
time 2024-01-30 13:42:04.522289 at index 182 for approximate matching 
time 2024-01-30 13:42:44.488391 at index 183 for approximate matching 
time 2024-01-30 13:42:51.206836 at index 184 for approximate matching 
time 2024-01-30 13:43:36.574948 at index 185 for approximate matching 
time 2024-01-30 13:43:48.33995 at index 186 for approximate matching 
time 2024-01-30 13:44:28.426627 at index 187 for approximate matching 
time 2024-01-30 13:45:00.68932 at index 188 for approximate matching 
time 2024-01-30 13:45:20.608074 at index 189 for approximate matching 
time 2024-01-30 13:45:24.031498 at index 190 for approximate matching 
time 2024-01-30 13:45:43.423412 at index 191 for approximate matching 
time 2024-01-30 13:45:52.707975 at index 192 for approximate matching 
time 2024-01-30 13:46:04.004197 at index 193 for approximate matching 
time 2024-01-30 13:46:55.407903 at index 194 for approximate matching 
time 2024-01-30 13:47:19.84506 at index 195 for approximate matching 
time 2024-01-30 13:47:44.753757 at index 196 for approximate matching 
time 2024-01-30 13:47:50.908477 at index 197 for approximate matching 
time 2024-01-30 13:48:02.830776 at index 198 for approximate matching 
time 2024-01-30 13:48:52.323579 at index 199 for approximate matching 
time 2024-01-30 13:49:35.823219 at index 200 for approximate matching 
time 2024-01-30 13:49:44.441329 at index 201 for approximate matching 
time 2024-01-30 13:49:53.294003 at index 202 for approximate matching 
time 2024-01-30 13:50:00.99219 at index 203 for approximate matching 
time 2024-01-30 13:50:59.603872 at index 204 for approximate matching 
time 2024-01-30 13:51:32.455928 at index 205 for approximate matching 
time 2024-01-30 13:51:38.07243 at index 206 for approximate matching 
time 2024-01-30 13:52:38.471767 at index 207 for approximate matching 
time 2024-01-30 13:52:43.237143 at index 208 for approximate matching 
time 2024-01-30 13:53:00.755622 at index 209 for approximate matching 
time 2024-01-30 13:53:07.622336 at index 210 for approximate matching 
time 2024-02-27 13:37:55.659747 at index 211 for approximate matching 
time 2024-02-27 13:38:21.642549 at index 212 for approximate matching 
time 2024-02-27 13:38:37.514668 at index 213 for approximate matching 
time 2024-03-04 13:05:19.929423 at index 214 for word2vec 
time 2024-03-04 13:05:30.380306 at index 215 for word2vec 
time 2024-03-04 13:05:42.359073 at index 216 for word2vec 
time 2024-03-04 13:06:03.26461 at index 217 for word2vec 
time 2024-03-04 13:06:07.566388 at index 218 for word2vec 
time 2024-03-04 13:06:24.270221 at index 219 for word2vec 
time 2024-03-04 13:06:35.428351 at index 220 for word2vec 
time 2024-03-04 13:06:50.153841 at index 221 for word2vec 
time 2024-03-04 13:07:20.851714 at index 222 for word2vec 
time 2024-03-04 13:07:28.042548 at index 223 for word2vec 
time 2024-03-04 13:08:03.706303 at index 224 for word2vec 
time 2024-03-04 13:08:07.443088 at index 225 for word2vec 
time 2024-03-04 13:08:27.344692 at index 226 for word2vec 
time 2024-03-04 13:09:03.163585 at index 227 for word2vec 
time 2024-03-04 13:09:08.440557 at index 228 for word2vec 
time 2024-03-04 13:09:19.779757 at index 229 for word2vec 
time 2024-03-04 13:09:45.662595 at index 230 for word2vec 
time 2024-03-04 13:10:54.121394 at index 231 for word2vec 
time 2024-03-04 13:11:02.622483 at index 232 for word2vec 
time 2024-03-04 13:11:54.581963 at index 233 for word2vec 
time 2024-03-04 13:12:14.589523 at index 234 for word2vec 
time 2024-03-04 13:12:22.68198 at index 235 for word2vec 
time 2024-03-04 13:13:03.998144 at index 236 for word2vec 
time 2024-03-04 13:13:19.255176 at index 237 for word2vec 
time 2024-03-04 13:13:50.795809 at index 238 for word2vec 
time 2024-03-04 13:14:02.910107 at index 239 for word2vec 
time 2024-03-04 13:14:40.462864 at index 240 for word2vec 
time 2024-03-04 13:14:45.523555 at index 241 for word2vec 
time 2024-03-04 13:14:53.85662 at index 242 for word2vec 
time 2024-03-04 13:14:58.878649 at index 243 for word2vec 
time 2024-03-04 13:15:14.723198 at index 244 for word2vec 
time 2024-03-04 13:15:29.332406 at index 245 for word2vec 
time 2024-03-04 13:15:40.397531 at index 246 for word2vec 
time 2024-03-04 13:15:55.861239 at index 247 for word2vec 
time 2024-03-04 13:16:00.240809 at index 248 for word2vec

vec_CAMs <- c(); h = 1
for(c in unique(CAMfiles_combined[[1]]$CAM)){
  tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% c,]
  
  if(!(any(c("Rettungsroboter", "sozialer Assistenzroboter") %in% tmp$text) & all(c("Vorteile", "Nachteile") %in% tmp$text))){
    print(c)
    print(tmp$text)
    vec_CAMs[h] <- c
    h = h + 1
    # plot(CAMdrawn_combined[[c]])
  }
}

[1] "30435398-0a6f-4a21-a583-06a99ec8cca7"
 [1] "Vorteile"                          "Soziale Assistenzroboter"         
 [3] "Nachteile"                         "Potentieller Arbeitsplätzeverlust"
 [5] "Eher ein Systemproblem"            "Entlastet menschliche Arbeiter"   
 [7] "Reduzierung von Einsamkeit"        "Ersatz für Interkation"           
 [9] "Unterstützung beim Lernen"         "Immer Verfügbar"                  
[1] "20e51ba7-d47d-41c1-8d18-f809e08dde58"
 [1] "funktioniert in japan"          "achtung vor KI"                
 [3] "nur zur Unterstützung!!!!!"     "Menschlichkeit ist wichtig!!!!"
 [5] "Arbeitskräfte"                  "emotionale Inteligenz wichtig" 
 [7] "ohne Menschen nicht"            "Konzerne + Pflege "            
 [9] "NOGO bei Autismus"              "ScheissIDEE"                   
[1] "27245bf0-ef28-4d16-98f0-458b0f81ff3f"
 [1] "Roboter einsatz"         "Autonom"                
 [3] "Ferngesteuert"           "Ethik"                  
 [5] "Diskriminierung"         "Trainingsdaten Qualität"
 [7] "Kosten"                  "Kommunikationsfähigkeit"
 [9] "Quantiät"                "Lebensrisiko ersparnis" 
[1] "adcd6ad0-b2c1-417f-a487-78fc4609419f"
 [1] "Roboter"                  "Vorteile"                
 [3] "Nachteile"                "KI"                      
 [5] "Schlechte Programmierung" "Übermütig"               
 [7] "Ausnutzen"                "Teuer"                   
 [9] "Menschlichkeit lernen"    "Menschliche Natur"       
[11] "Mehr Rettungen"           "Technischer Fortschritt" 
[13] "Arbeitsentlasung"        
[1] "a1289b68-8242-49b2-8468-991c58d63c2d"
[1] "Paro-Roboter" "Therapie"     "Unterhaltung" "Emotion"      "Freundschaft"
[6] "Zeit"        
[1] "7d97cd7c-3b6a-4b25-80bc-17b2d3985261"
[1] "Flexiable"                 "SoftRoboter"              
[3] "Gerige Verletzungs Gefahr" "Versorgung"               
[5] "Weniger Stabil"            "Voreigenommen"            
[7] "Weniger präziesse"        
[1] "d14770f2-34b2-44ed-af93-df0fbf6101ea"
[1] "soft rettungsroboter"         "nachteile"                   
[3] "vorteile"                     "sehr Teuer"                  
[5] "Ethische Dilemma"             "Kamera "                     
[7] "Adaptierbar"                  "nahrungsgabe in gefährliche "
[1] "4d44c9a9-782f-4727-8841-bc24eaa88048"
 [1] "Vorteile"                          "Soziale Assistenzroboter"         
 [3] "Nachteile"                         "Potentieller Arbeitsplätzeverlust"
 [5] "Eher ein Systemproblem"            "Entlastet menschliche Arbeiter"   
 [7] "Reduzierung von Einsamkeit"        "Ersatz für Interkation"           
 [9] "Unterstützung beim Lernen"         "Immer Verfügbar"                  
[11] "Emotionale Abhängigkeit"          
[1] "1c1f90f1-9a80-4f78-bc44-536b916acd0d"
 [1] "funktioniert in japan"          "achtung vor KI"                
 [3] "nur zur Unterstützung!!!!!"     "Menschlichkeit ist wichtig!!!!"
 [5] "Arbeitskräfte"                  "emotionale Inteligenz wichtig" 
 [7] "ohne Menschen nicht"            "Konzerne + Pflege "            
 [9] "NOGO bei Autismus"              "ScheissIDEE"                   
[1] "144ed50a-cf99-4ea2-b72a-53b5e0c27fa8"
 [1] "Roboter einsatz"         "Autonom"                
 [3] "Ferngesteuert"           "Ethik"                  
 [5] "Diskriminierung"         "Trainingsdaten Qualität"
 [7] "Kosten"                  "Kommunikationsfähigkeit"
 [9] "Quantiät"                "Lebensrisiko ersparnis" 
[11] "Anpassungsfähigkeit"    
[1] "5ce171ad-cd03-4ec2-b49c-ba0c90e6c9f4"
 [1] "Roboter"                  "Vorteile"                
 [3] "Nachteile"                "KI"                      
 [5] "Schlechte Programmierung" "Übermütig"               
 [7] "Ausnutzen"                "Teuer"                   
 [9] "Menschlichkeit lernen"    "Menschliche Natur"       
[11] "Mehr Rettungen"           "Technischer Fortschritt" 
[13] "Arbeitsentlasung"

## fix manually
# single pre defined concepts were falsely written
CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"] <- "sozialer Assistenzroboter"
CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "sozialer Assistenzroboter"] <- "sozialer Assistenzroboter_positive"


CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Roboter"] <- "Rettungsroboter"
CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Rettungsroboter"] <- "Rettungsroboter_neutral"              


## remove 5 persons
vec_Pids <- unique(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$CAM %in% vec_CAMs])
vec_Pids

[1] "5debfbcc3a11682f0fae8b29" "5e5832478c2b6f03f9acd234"
[3] "65522df91d0bd93a77e6756a" "5ba00acff337030001de805d"
[5] "65539049fb7f10447116344b" "633ddca746fbf59ca2e530aa"
[7] "651d789e7d36a58fab803b09"

# remove CAMs
CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% vec_Pids,]
CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% vec_Pids,]
CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% vec_Pids,]

# remove questionnaires
questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% vec_Pids,]

###
for(c in unique(CAMfiles_combined[[1]]$participantCAM)){
  tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$participantCAM %in% c,]
  
  if(sum(tmp$text %in% c("Vorteile", "Nachteile")) != 4){
    print(c)
    print(sum(tmp$text %in% c("Vorteile", "Nachteile")))
    print(tmp$text)
  }
}

[1] "644ac0e41a2fabcf5d563f20"
[1] 6
 [1] "Rettungsroboter"                  "Nachteile"                       
 [3] "Vorteile"                         "Missbrauch von Robotern "        
 [5] "Fehlende Autonomie"               "Besserer Zugang "                
 [7] "Genaue Handhabung"                "Autonome Möglichkeiten"          
 [9] "Zuverlässligkeit"                 "Ausfall Von Robotern"            
[11] "Fehlfuktionen"                    "Einsatz bei Sucheinsätzen"       
[13] "Ethik"                            "Umgang mit verletzen"            
[15] "Keine Erstversorgung"             "Rettungsroboter"                 
[17] "Nachteile"                        "Vorteile"                        
[19] "Missbrauch von Robotern "         "Fehlende Autonomie"              
[21] "Besserer Zugang "                 "Genaue Handhabung"               
[23] "Autonome Möglichkeiten"           "Zuverlässligkeit"                
[25] "Ausfall Von Robotern"             "Fehlfuktionen"                   
[27] "Einsatz bei Sucheinsätzen"        "Ethik"                           
[29] "Umgang mit verletzen"             "Keine Erstversorgung"            
[31] "Softer Roboter"                   "Nachteile"                       
[33] "Vorteile"                         "Inspiriert von Organismen"       
[35] "Autonom + Manuell"                "Natürliche Bewegungen"           
[37] "Lieferung von  Versorgungsgütern" "Minderung des Verletzungsrisikos"
[39] "Weniger Präzisition"              "Aktuell noch ferngesteuert"      
[41] "Beschränkte autonomie"           
[1] "65410ad762a7e3c570b6273c"
[1] 8
 [1] "sozialer Assistenzroboter"               
 [2] "Nachteile"                               
 [3] "Vorteile"                                
 [4] "Ziel"                                    
 [5] "Anwendungsbereiche"                      
 [6] "Unterstützung"                           
 [7] "-Therapie"                               
 [8] "-Altenpflege"                            
 [9] "-Bildung"                                
[10] "-Soziale Begleitung"                     
[11] "Vorteile"                                
[12] "Gesellschaft für Einsame"                
[13] "Reduktion von Isolation "                
[14] "Nachteile"                               
[15] "Verlust der Fähigkeit "                  
[16] "Verringerung der Eigeninitiative"        
[17] "Verbesserung der Emotionserkennung"      
[18] "Verbesserung Kommunikationsfähigkeit"    
[19] "Stärkung sozialer Fähigkeiten"           
[20] "Motivation und Förderung"                
[21] "Ersatz menschlicher Arbeitskräfte"       
[22] "Sozialer Wandel"                         
[23] "Beeinflussung menschlicher Interaktionen"
[24] "Verlust der Menschlichkeit"              
[25] "sozialer Assistenzroboter"               
[26] "Nachteile"                               
[27] "Vorteile"                                
[28] "Ziel"                                    
[29] "Anwendungsbereiche"                      
[30] "Unterstützung"                           
[31] "-Therapie"                               
[32] "-Altenpflege"                            
[33] "-Bildung"                                
[34] "-Soziale Begleitung"                     
[35] "Vorteile"                                
[36] "Gesellschaft für Einsame"                
[37] "Reduktion von Isolation "                
[38] "Nachteile"                               
[39] "Verlust der Fähigkeit "                  
[40] "Verringerung der Eigeninitiative"        
[41] "Verbesserung der Emotionserkennung"      
[42] "Verbesserung Kommunikationsfähigkeit"    
[43] "Stärkung sozialer Fähigkeiten"           
[44] "Motivation und Förderung"                
[45] "Ersatz menschlicher Arbeitskräfte"       
[46] "Sozialer Wandel"                         
[47] "Beeinflussung menschlicher Interaktionen"
[48] "Verlust der Menschlichkeit"              
[1] "5dea808cce8d8d19f5424b21"
[1] 6
 [1] "sozialer Assistenzroboter"      "Nachteile"                     
 [3] "Vorteile"                       "Arbeitsmarkt"                  
 [5] "Roboter füllen Leerstellen"     "Kein menschlicher Bezug"       
 [7] "Funktionen von Robotern"        "Ethische Probleme"             
 [9] "Technische Probleme"            "Günstiger als Menschen"        
[11] "24/7 Verfügbarkeit"             "Repetitiven Aufgaben"          
[13] "Überwachung von Vitalzeichen"   "Körperlich belastende Arbeiten"
[15] "Keine Empathie"                 "mögliche Datenschutz Probleme" 
[17] "Abhängigkeit von Robotern"      "zukünftig unausweichlich"      
[19] "unbesetzte Stellen"             "sozialer Assistenzroboter"     
[21] "Nachteile"                      "Vorteile"                      
[23] "Arbeitsmarkt"                   "Roboter füllen Leerstellen"    
[25] "Kein menschlicher Bezug"        "Funktionen von Robotern"       
[27] "Ethische Probleme"              "Technische Probleme"           
[29] "Günstiger als Menschen"         "24/7 Verfügbarkeit"            
[31] "Repetitiven Aufgaben"           "Überwachung von Vitalzeichen"  
[33] "Körperlich belastende Arbeiten" "Keine Empathie"                
[35] "mögliche Datenschutz Probleme"  "Abhängigkeit von Robotern"     
[37] "zukünftig unausweichlich"       "unbesetzte Stellen"            
[39] "softroboter"                    "Nachteile"                     
[41] "Vorteile"                       "geringeres Verletzungsrisiko"  
[43] "emotionale Therapieansetze "    "emotionsersatz"                
[45] "Nur Fake-Empathie"             
[1] "6560e6f734ae18bd18474cc9"
[1] 28
 [1] "Rettungsroboter" "Nachteile"       "Vorteile"        "Vorteile"       
 [5] "Nachteile"       "Vorteile"        "Vorteile"        "Nachteile"      
 [9] "Vorteile"        "Nachteile"       "Rettungsroboter" "Vorteile"       
[13] "Vorteile"        "Vorteile"        "Nachteile"       "Vorteile"       
[17] "Rettungsroboter" "Nachteile"       "Vorteile"        "Vorteile"       
[21] "Nachteile"       "Vorteile"        "Vorteile"        "Nachteile"      
[25] "Vorteile"        "Nachteile"       "Rettungsroboter" "Vorteile"       
[29] "Vorteile"        "Vorteile"        "Nachteile"       "Vorteile"

# remove person "6560e6f734ae18bd18474cc9" -> only draw pre-defined concepts
CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% "6560e6f734ae18bd18474cc9",]
CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% "6560e6f734ae18bd18474cc9",]
CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% "6560e6f734ae18bd18474cc9",]

# remove questionnaires
questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% "6560e6f734ae18bd18474cc9",]


# remove person "65304e8a630196510c79f7df" -> draw multiple times concept "leer"
for(c in unique(CAMfiles_combined[[1]]$CAM)){
  tmp <-  CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM == c,]
  
  if(any(table(tmp$text) >= 3)){
    print(c)
    print(sort(table(tmp$text)))
  }
}

[1] "503b3517-b003-48e5-b121-f48c9a64ecb6"

   Arbeitserleichterung         erweiterungsfähig     Gefahr für Menschen  
                       1                        1                        1 
            Kostenfaktor                Nachteile Opfer rechtzeitig finden 
                       1                        1                        1 
         Rettungsroboter                  Stabill                 Vorteile 
                       1                        1                        1 
                    leer 
                       6 
[1] "39e7d213-1276-4da8-99ea-5a13487874e7"

          Arbeitserleichterung                erweiterungsfähig 
                              1                               1 
           Gefahr für Menschen                     Kostenfaktor 
                              1                               1 
Lieferung von Versorgungsgüter                        Nachteile 
                              1                               1 
       Opfer rechtzeitig finden                 Rettungsroboter 
                              1                               1 
                        Stabill                        Vorteile 
                              1                               1 
                           leer 
                              5

tmp <-  CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% c("503b3517-b003-48e5-b121-f48c9a64ecb6", "39e7d213-1276-4da8-99ea-5a13487874e7"),]
unique(tmp$participantCAM)

[1] "65304e8a630196510c79f7df"

CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% "65304e8a630196510c79f7df",]
CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% "65304e8a630196510c79f7df",]
CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% "65304e8a630196510c79f7df",]

# remove questionnaires
questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% "65304e8a630196510c79f7df",]



### draw CAMs
CAMdrawn_combined <- draw_CAM(dat_merged = CAMfiles_combined[[3]],
                     dat_nodes = CAMfiles_combined[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 432 CAMs... 
[1] "== ids_CAMs in drawnCAM"

### network indicators
tmp_microIndicator <- c("Rettungsroboter", "sozialer Assistenzroboter", "Vorteile", "Nachteile")
networkIndicators_combined <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_combined, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)


# wordlist
CAMwordlist_combined <- create_wordlist(
  dat_nodes =  CAMfiles_combined[[1]],
  dat_merged =  CAMfiles_combined[[3]],
  order = "frequency",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

[1] "create_wordlist - use summarized words"
[1] 2485
[1] 6404
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 432 CAMs... 
[1] "== ids_CAMs in drawnCAM"

if(all(nchar(CAMwordlist_combined$Words) > 2)){
  print("sucessfully removed empty words")
}else{
  CAMwordlist_combined$Words[nchar(CAMwordlist_combined$Words) < 2]
}

[1] "f"

DT::datatable(CAMwordlist_combined, options = list(pageLength = 5))

get wordlists for raters

setwd("outputs/01_dataPreperation/wordlists_raters")


nrow(CAMwordlist_combined) / 7

[1] 306.4286

# 6*307 + 311


vec_words <- 1:nrow(CAMwordlist_combined) 
vec_names <- c("Louisa", "Julius", "Christophe", "Irina", "Michael", "Paul", "Lars")

nrow(CAMwordlist_combined)  / length(vec_names)

[1] 306.4286

for(i in 1:7){
  print(length(vec_words))
  if(i == 1){
    tmp <- sample(x = vec_words, size = 309, replace = FALSE)
    tmp_out <- CAMwordlist_combined[tmp,]
    write.xlsx2(x = cbind(Superordinate = NA,   Comment = NA, tmp_out), file = paste0("ratings_", vec_names[i], ".xlsx"), row.names = FALSE)
    vec_words <- vec_words[!vec_words %in% tmp]
  }else{
    tmp <- sample(x = vec_words, size = 306, replace = FALSE)
    tmp_out <- CAMwordlist_combined[tmp,]
    write.xlsx2(x = cbind(Superordinate = NA,   Comment = NA, tmp_out), file = paste0("ratings_", vec_names[i], ".xlsx"), row.names = FALSE)
    vec_words <- vec_words[!vec_words %in% tmp]
  }
}

[1] 2145
[1] 1836
[1] 1530
[1] 1224
[1] 918
[1] 612
[1] 306

clean up CAM data

identify words with high variance in valence

setwd("data_overwrite")
dat_overwrite <- xlsx::read.xlsx2(file = "ratings_combined_final.xlsx", sheetIndex = 1)
dat_overwrite$mean_valence <- as.numeric(dat_overwrite$mean_valence)

tmp_splitWords <- dat_overwrite$splitWords
# dat_overwrite$Words <- str_trim(string = dat_overwrite$Words, side = "both")
# table(dat_overwrite$Words)[table(dat_overwrite$Words) >= 2]


tmp_nodes <- CAMfiles_combined[[1]]
tmp_nodes$text_summarized <- str_remove(string = tmp_nodes$text_summarized, pattern = "_positive$|_negative$|_neutral$|_ambivalent$")


dat_overwrite$splitWords <- 0

## loop through single words to identify possible split words:
for(i in 1:nrow(dat_overwrite)){
  if(dat_overwrite$sd_valence[i] >= 1){
    
    tmp <- tmp_nodes$value[tmp_nodes$text_summarized == dat_overwrite$Words[i]]
    
    tmp <- tmp[tmp != 10]
    tmp <- tmp[tmp != 0]

    if(!is.na(sd(tmp)) & any(tmp >= 1) & any(tmp <= -1)){
      cat("\n for word: \"", dat_overwrite$Words[i], "\" there is a high SD with \"", sd(tmp), "\"\n", 
          sort(tmp), "\n")
      
      dat_overwrite$splitWords[i] <- 1
    }
  }
}


 for word: " Einsamkeit " there is a high SD with " 2.73252 "
 -3 -3 1 1 3 3 

 for word: " Softroboter " there is a high SD with " 1.159502 "
 -1 1 1 2 2 2 2 2 3 3 

 for word: " Präzision " there is a high SD with " 1.686887 "
 -2 -1 -1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 

 for word: " Ethik/Moral " there is a high SD with " 1.563472 "
 -3 -3 -3 -3 -3 -2 -2 -2 -1 2 

 for word: " Hacking " there is a high SD with " 1.505545 "
 -3 -3 -2 -2 -1 1 

 for word: " Kosten " there is a high SD with " 1.55348 "
 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 1 1 1 2 2 3 3 3 3 

 for word: " Datenschutz " there is a high SD with " 1.117442 "
 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -2 -2 -2 -2 -2 -2 -2 -2 -1 -1 1 1 

 for word: " Autonom/Autonomie " there is a high SD with " 2.043961 "
 -3 -3 -2 -2 -2 -2 1 1 2 2 

 for word: " Fehler " there is a high SD with " 1.47093 "
 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 3 3 

 for word: " Programmierung " there is a high SD with " 1.380131 "
 -3 -3 -1 -1 -1 -1 1 

 for word: " Genauigkeit " there is a high SD with " 2.9277 "
 -3 -3 -1 3 3 3 3 

 for word: " Interaktion/Kommunikation  " there is a high SD with " 2.020726 "
 -3 -3 1 1 1 2 2 2 2 2 3 3 

 for word: " Felxibilität " there is a high SD with " 1.414214 "
 -2 1 1 2 2 2 2 3 3 3 3 3 3 

 for word: " Verfügbarkeit " there is a high SD with " 2.211083 "
 -2 -2 -1 -1 2 2 3 3 3 3 

 for word: " menschliche Komponente " there is a high SD with " 3.464102 "
 -3 -3 3 

 for word: " Gefahr (Waffe/Attacken) " there is a high SD with " 2.366432 "
 -3 -3 -3 -3 -2 -2 -2 -2 3 3 

 for word: " Anschaffung " there is a high SD with " 2.309401 "
 -3 -3 1 1 

 for word: " hohe Kosten " there is a high SD with " 1.486447 "
 -3 -3 -2 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 2 2 

 for word: " Emotionslos " there is a high SD with " 2.347576 "
 -2 -2 -2 -2 -2 -2 2 2 3 3 

 for word: " Menschlichkeit " there is a high SD with " 2.886751 "
 -3 -3 2 

 for word: " Verhalten " there is a high SD with " 2.12132 "
 -2 1 

 for word: " Sicherheit " there is a high SD with " 2.160247 "
 -3 -3 -2 -2 -2 -2 -2 -2 1 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 

 for word: " unzugängliche Bereiche " there is a high SD with " 2.708013 "
 -3 2 2 3 

 for word: " komplexe Probleme " there is a high SD with " 2.12132 "
 -2 1 

 for word: " Entlastung Personal " there is a high SD with " 2.886751 "
 -2 -2 3 3 

 for word: " Zuverlässigkeit " there is a high SD with " 1.334523 "
 -1 -1 -1 1 1 1 1 2 2 2 2 2 2 3 3 

 for word: " Entwicklung " there is a high SD with " 2.73252 "
 -3 -3 1 1 3 3 

 for word: " Nachteile " there is a high SD with " 0.9407678 "
 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 1 

 for word: " Emotionale Bindung " there is a high SD with " 1.732051 "
 -1 2 2 

 for word: " teure Sensore " there is a high SD with " 1.414214 "
 -1 1

table(dat_overwrite$splitWords)


   0    1 
2123   30

for(i in 1:length(tmp_splitWords)){
  if(tmp_splitWords[i] == "2"){
    dat_overwrite$splitWords[i] <- 1
  }
}

table(dat_overwrite$splitWords)


   0    1 
2121   32

xlsx::write.xlsx2(x = dat_overwrite, file = "ratings_combined_final_splitWords.xlsx")

overwrite single words

setwd("data_overwrite")
dat_overwrite_adjusted <- xlsx::read.xlsx2(file = "ratings_combined_final_splitWords_adjusted.xlsx", sheetIndex = 1)


## fix single word
tmp_nodes$text_summarized[tmp_nodes$text_summarized == "-Soziale Begleitung"] <- "Soziale Begleitung"



## overwrite concepts
for(i in 1:nrow(dat_overwrite)){
  if(!is.na(dat_overwrite$Superordinate[i])){
    tmp <- tmp_nodes[tmp_nodes$text_summarized == dat_overwrite$Words[i], ]
    
    for(j in 1:nrow(tmp)){
      if(sum(tmp_nodes$id %in% tmp[j,]$id) != 1){
        # print("single CAM removed")
        # print(i)
        break
      }else{
        # add suffix
        tmp_superordinate <- str_trim(string = dat_overwrite$Superordinate[i], side = "both")
        if(tmp[j,]$value < 0){
          if(dat_overwrite_adjusted$splitWords[i] == "0"){
            tmp[j,]$text_summarized <- paste0(tmp_superordinate, "_negative")
          }else{
            tmp[j,]$text_summarized <- paste0(dat_overwrite_adjusted$rep_neg[i], "_negative")
          }
        }else if(tmp[j,]$value == 0){
          if(dat_overwrite_adjusted$splitWords[i] == "0"){
            tmp[j,]$text_summarized <- paste0(tmp_superordinate, "_neutral")
          }else{
            tmp[j,]$text_summarized <- paste0(dat_overwrite_adjusted$rep_neut[i], "_neutral")
          }
        }else if(tmp[j,]$value == 10){
          if(dat_overwrite_adjusted$splitWords[i] == "0"){
            tmp[j,]$text_summarized <- paste0(tmp_superordinate, "_ambivalent")
          }else{
            tmp[j,]$text_summarized <- paste0(dat_overwrite_adjusted$rep_amb[i], "_ambivalent")
          }
        }else{
          if(dat_overwrite_adjusted$splitWords[i] == "0"){
            tmp[j,]$text_summarized <- paste0(tmp_superordinate, "_positive")
          }else{
            tmp[j,]$text_summarized <- paste0(dat_overwrite_adjusted$rep_pos[i], "_positive")
          }
        }
      }   
    }        
    
    tmp_nodes$text_summarized[tmp_nodes$text_summarized == dat_overwrite$Words[i]] <- tmp$text_summarized
  }
}



## manually adjust wrongly summarized category
tmp_nodes$text[tmp_nodes$text_summarized == "ADJUSTME_ambivalent"]

[1] "liebe" "liebe"

tmp_nodes$text_summarized[tmp_nodes$text_summarized == "ADJUSTME_ambivalent"] <- "RCA_ambivalent"

tmp_nodes$text[tmp_nodes$text_summarized == "ADJUSTME_neutral"]

[1] "Leben"  "Fehler" "Leben"  "Fehler"

tmp_nodes$text_summarized[tmp_nodes$text_summarized == "ADJUSTME_neutral"] <- "RCN_neutral"

tmp_nodes$text[tmp_nodes$text_summarized == "ADJUSTME_positive"]

[1] "Leise"  "Lernen" "Leise"  "Lernen"

tmp_nodes$text_summarized[tmp_nodes$text_summarized == "ADJUSTME_positive"] <- "TP_positive"

tmp_nodes$text[tmp_nodes$text_summarized == "ADJUSTME_negative"]

 [1] "Teuer"   "Teuer"   "Teuer"   "Teuer"   "teuer"   "teuer"   "Ausfall"
 [8] "Teuer"   "Teuer"   "Teuer"   "Fehlern" "Teuer"   "Preis"   "teurer" 
[15] "teuer"   "Teuer"   "teuer"   "Teuer"   "teuer"   "Teuer"   "Teuer"  
[22] "Teuer"   "Teuer"   "Teuer"   "teuer"   "teuer"   "Ausfall" "Teuer"  
[29] "Teuer"   "Teuer"   "Fehlern" "Teuer"   "Preis"   "teurer"  "teuer"  
[36] "Teuer"   "Fehler"  "teuer"   "Teuer"   "teuer"   "Teuer"

tmp_nodes$text_summarized[tmp_nodes$text_summarized == "ADJUSTME_negative" & str_detect(string = tmp_nodes$text, pattern = "teuer|Teuer|Preis|teurer")] <- "HC_negative"

tmp_nodes$text[tmp_nodes$text_summarized == "ADJUSTME_negative"]

[1] "Ausfall" "Fehlern" "Ausfall" "Fehlern" "Fehler"

tmp_nodes$text_summarized[tmp_nodes$text_summarized == "ADJUSTME_negative"] <- "TL_negative"



##############
table(str_remove(string = tmp_nodes$text_summarized, pattern = "_positive$|_negative$|_neutral$|_ambivalent$"))


                      AN                       AP                       HC 
                     176                       96                      189 
                    HRIN                     HRIP                       LC 
                     229                      222                      107 
                      MT                Nachteile                        R 
                     447                      436                      244 
                     RCA                      RCN                     RCPN 
                     195                      248                      169 
                    RCPP          Rettungsroboter                       SA 
                     252                      234                      643 
                     SIN                      SIP Soziale Assistenzroboter 
                     401                      404                      198 
                       T                       TL                       TP 
                     209                      265                      604 
                Vorteile 
                     436

Fix if participants wrote one of the predefined concepts (Nachteile, Vorteile):

### check multiples of Vorteile Nachteile
CAMfiles_combined[[1]] <- tmp_nodes


for(c in unique(CAMfiles_combined[[1]]$participantCAM)){
  tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$participantCAM %in% c,]

  if(sum(tmp$text %in% c("Vorteile", "Nachteile")) != 4){
    print(c)
    print(sum(tmp$text %in% c("Vorteile", "Nachteile")))
    
    for(j in unique(tmp$CAM)){
      plot(CAMdrawn_combined[[j]])
    }
    
    CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% c & !CAMfiles_combined[[1]]$predefinedConcept & str_detect(string = tmp_nodes$text, pattern = "^Nachteile")] <- "RCPN_negative"
CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% c & !CAMfiles_combined[[1]]$predefinedConcept & str_detect(string = tmp_nodes$text, pattern = "^Vorteile")] <- "RCPP_positive"
  }
}

[1] "644ac0e41a2fabcf5d563f20"
[1] 6

[1] "65410ad762a7e3c570b6273c"
[1] 8

[1] "5dea808cce8d8d19f5424b21"
[1] 6

merge and save all data

setwd("outputs/01_dataPreperation/final")


### remove all previously removed participants
length(unique(CAMfiles_combined[[1]]$participantCAM))

[1] 216

nrow(networkIndicators_pre)

[1] 225

nrow(networkIndicators_post)

[1] 225

nrow(questionnaire)

[1] 216

networkIndicators_pre <-
  networkIndicators_pre[networkIndicators_pre$participantCAM %in% CAMfiles_combined[[1]]$participantCAM,]
networkIndicators_post <-
  networkIndicators_post[networkIndicators_post$participantCAM %in% CAMfiles_combined[[1]]$participantCAM,]
networkIndicators_post <-
  networkIndicators_post[networkIndicators_post$participantCAM %in% CAMfiles_combined[[1]]$participantCAM,]

### match data
if (all(questionnaire$PROLIFIC_PID == networkIndicators_pre$participantCAM) &
    all(networkIndicators_pre$participantCAM == networkIndicators_post$participantCAM) & 
    all(unique(CAMfiles_combined[[1]]$participantCAM) == networkIndicators_post$participantCAM)) {
  print("all data can be matched row by row")
  
  # save questionnaire
  questionnaire$CAMpre <- networkIndicators_pre$CAM_ID
  questionnaire$CAMpost <- networkIndicators_post$CAM_ID

  ## save as .xlsx file
  xlsx::write.xlsx2(x = questionnaire, file = "questionnaire_final.xlsx")
  ## save as R object
  saveRDS(questionnaire, file = "questionnaire_final.rds")
  
  
  # save network indicators pre
  ## save as .xlsx file
  xlsx::write.xlsx2(x = networkIndicators_pre, file = "networkIndicators_pre_final.xlsx")
  ## save as R object
  saveRDS(networkIndicators_pre, file = "networkIndicators_pre_final.rds")
  
  # save network indicators post
  ## save as .xlsx file
  xlsx::write.xlsx2(x = networkIndicators_post, file = "networkIndicators_post_final.xlsx")
  ## save as R object
  saveRDS(networkIndicators_post, file = "networkIndicators_post_final.rds")
  
  
  # save CAMfiles pre
  saveRDS(CAMfiles_pre, file = "CAMfiles_pre_final.rds")
  
  # save CAMfiles post
  saveRDS(CAMfiles_post, file = "CAMfiles_post_final.rds")
  
  # save CAMfiles combined and clean
  saveRDS(CAMfiles_combined, file = "CAMfiles_combined_final.rds")
  
  
  # save questionnaire combined with CAMs
  colnames(networkIndicators_pre) <- paste0(colnames(networkIndicators_pre), "_pre")
  colnames(networkIndicators_post) <- paste0(colnames(networkIndicators_post), "_post")

  questionnaireCAMs <- cbind(questionnaire, networkIndicators_pre, networkIndicators_post)

  ## save as .xlsx file
  xlsx::write.xlsx2(x = questionnaireCAMs, file = "questionnaireCAMs_final.xlsx")
  ## save as R object
  saveRDS(questionnaireCAMs, file = "questionnaireCAMs_final.rds")
}

[1] "all data can be matched row by row"