Data Preperation - delta CAMs

Author

Julius Fenn

Published

July 24, 2025

Background Information

This is an R Markdown document. Instructions for writing these documents and background information can be found in the book written by Xie, Allaire, and Grolemund (2018) When you execute code within the document, the results appear beneath the code. This is an R Markdown document. Instructions for writing these documents and background information can be found in the book written by Xie, Allaire, and Grolemund (2018) When you execute code within the document, the results appear beneath the code. This file contains summary statistics, respectively the analysis step (confirmatory and exploratory analyses). Files are split into multiple subfiles like data processing and data analyses steps, which follows the classical data-analysis pipeline (see Peng and Matsui 2016; Wickham and Grolemund 2017).

Setup

Global variables

save_CAMs_as_pictures = FALSE
# consider_Protocol = FALSE # not needed at current stage

Load packages

require(pacman)

Lade nötiges Paket: pacman

p_load('tidyverse', 'jsonlite', 'magrittr',
       'stargazer', 'psych', 'jtools', 'DT', 'igraph',
       'writexl', 'Cairo')

Check your raw data:

### list data files
setwd("data")
folders <- list.files(pattern = "^study_result.*")



for(i in 1:length(folders)){
  setwd(folders[i])
  if(length(dir()) == 5){
    cat("\n ################## \n")
    print(i)
    
    # pre CAM data
    setwd(dir()[1])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    tmp_id <- tmp$ID[!is.na(tmp$ID)]
    cat("01:\n")
    print(tmp_id)
    print(tmp$sender[!is.na(tmp$sender)])
    setwd("..")
    
    # CAM data t1
    setwd(dir()[2])
    tmp2 <- jsonlite::fromJSON(txt = "data.txt")
    cat("02:\n")
    print(tmp2$projectCAM)
    print(tmp2$creator)
    print(length(tmp2$nodes))
    setwd("..")
    
    # post CAM data
    setwd(dir()[3])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    tmp_id <- tmp$ID[!is.na(tmp$ID)]
    cat("03:\n")
    print(tmp_id)
    print(tmp$sender)
    setwd("..")
    
    # CAM data t2
    setwd(dir()[4])
    tmp4 <- jsonlite::fromJSON(txt = "data.txt")
    cat("02:\n")
    print(tmp4$projectCAM)
    print(tmp4$creator)
    print(length(tmp4$nodes))
    
    
    tmp_MissingIds <- length(tmp4$nodes$id[!tmp4$nodes$id %in% tmp2$nodes$id])
    
    cat("CAM t1:", sum(tmp2$nodes$isActive), "CAM t2:", sum(tmp4$nodes$isActive), "number of IDs not in CAM t1:", tmp_MissingIds, "\n")

    setwd("..")
    
    # post CAM data second
    setwd(dir()[5])
    tmp <- jsonlite::fromJSON(txt = "data.txt")
    tmp_id <- tmp$ID[!is.na(tmp$ID)]
    cat("05:\n")
    print(tmp_id)
    print(tmp$sender[!is.na(tmp$sender)])
    setwd("..")
    
    cat("\n")
  }
  setwd("..")
}


 ################## 
[1] 2
01:
[1] 430435
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 430435
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 430435
[1] 15
CAM t1: 27 CAM t2: 27 number of IDs not in CAM t1: 0 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "CCDisImpair scale I"         "CCDisImpair scale II"       
 [5] "SCALES CCDisImpair Sequence" "additional scale"           
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 4
01:
[1] 791384
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 791384
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 791384
[1] 15
CAM t1: 20 CAM t2: 17 number of IDs not in CAM t1: 7 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "CCDisImpair scale I"         "CCDisImpair scale II"       
 [5] "SCALES CCDisImpair Sequence" "additional scale"           
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 7
01:
[1] 187700
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 187700
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 187700
[1] 15
CAM t1: 15 CAM t2: 17 number of IDs not in CAM t1: 3 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "additional scale"            "CCDisImpair scale I"        
 [5] "CCDisImpair scale II"        "SCALES CCDisImpair Sequence"
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 9
01:
[1] 954565
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
NULL
NULL
[1] 0
03:
NULL
NULL
02:
NULL
NULL
[1] 0
CAM t1: 0 CAM t2: 0 number of IDs not in CAM t1: 0 
05:
NULL
NULL


 ################## 
[1] 12
01:
[1] 872639
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 872639
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 872639
[1] 15
CAM t1: 13 CAM t2: 15 number of IDs not in CAM t1: 6 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "additional scale"            "CCDisImpair scale I"        
 [5] "CCDisImpair scale II"        "SCALES CCDisImpair Sequence"
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 13
01:
[1] 857746
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 857746
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 857746
[1] 15
CAM t1: 19 CAM t2: 19 number of IDs not in CAM t1: 0 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "CCDisImpair scale I"         "CCDisImpair scale II"       
 [5] "SCALES CCDisImpair Sequence" "additional scale"           
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 14
01:
[1] 195686
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 195686
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 195686
[1] 15
CAM t1: 13 CAM t2: 14 number of IDs not in CAM t1: 1 
05:
NULL
[1] "break500ms_2"                "FinalQuestionsScreen"       
[3] "additional scale"            "CCDisImpair scale I"        
[5] "CCDisImpair scale II"        "SCALES CCDisImpair Sequence"
[7] "SCALES Sequence"             "socioDemographic"           
[9] "FeedbackScreen"             


 ################## 
[1] 18
01:
[1] 81671
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 81671
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 81671
[1] 15
CAM t1: 15 CAM t2: 16 number of IDs not in CAM t1: 12 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "CCDisImpair scale I"         "CCDisImpair scale II"       
 [5] "SCALES CCDisImpair Sequence" "additional scale"           
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 19
01:
[1] 109965
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 109965
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 109965
[1] 15
CAM t1: 12 CAM t2: 15 number of IDs not in CAM t1: 6 
05:
NULL
[1] "break500ms_2"                "FinalQuestionsScreen"       
[3] "additional scale"            "CCDisImpair scale I"        
[5] "CCDisImpair scale II"        "SCALES CCDisImpair Sequence"
[7] "SCALES Sequence"             "socioDemographic"           
[9] "FeedbackScreen"             


 ################## 
[1] 21
01:
[1] 766788
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 766788
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 766788
[1] 15
CAM t1: 20 CAM t2: 20 number of IDs not in CAM t1: 0 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "additional scale"            "CCDisImpair scale I"        
 [5] "CCDisImpair scale II"        "SCALES CCDisImpair Sequence"
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 22
01:
[1] 18048
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 18048
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 18048
[1] 15
CAM t1: 16 CAM t2: 16 number of IDs not in CAM t1: 2 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "CCDisImpair scale I"         "CCDisImpair scale II"       
 [5] "SCALES CCDisImpair Sequence" "additional scale"           
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"               


 ################## 
[1] 27
01:
[1] 715177
[1] "Greetings"         "InformedConsent"   "InformedConsentNO"
[4] "ExclusionCriteria" "waitingPage"       "CAMsDetailedInst" 
02:
[1] "KeBNE_1a"
[1] 715177
[1] 15
03:
NULL
[1] "break500ms_1"              "CAMfeedbackGeneral"       
[3] "update CAM Instruction II" NA                         
02:
[1] "KeBNE_1b"
[1] 715177
[1] 15
CAM t1: 26 CAM t2: 26 number of IDs not in CAM t1: 0 
05:
NULL
 [1] "break500ms_2"                "FinalQuestionsScreen"       
 [3] "additional scale"            "CCDisImpair scale I"        
 [5] "CCDisImpair scale II"        "SCALES CCDisImpair Sequence"
 [7] "SCALES Sequence"             "socioDemographic"           
 [9] "FeedbackScreen"              "EndingScreen"

rm(tmp); rm(tmp2); rm(tmp4); rm(tmp_MissingIds)

create raw data files

### list data files
setwd("data")
folders <- list.files(pattern = "^study_result.*")

### create data files - GERMANY
# get CAM data
writeLines("", "CAM_t1.txt") # create file
text_connection <-
  file("CAM_t1.txt", "a") # open connection to append

# get CAM data second
writeLines("", "CAM_t2.txt") # create file
text_connection_second <-
  file("CAM_t2.txt", "a") # open connection to append

# get pre CAM data
writeLines("", "preCAM.txt") # create file
text_connection_pre <-
  file("preCAM.txt", "a") # open connection to append

# get post CAM data
writeLines("", "postCAM.txt") # create file
text_connection_post <-
  file("postCAM.txt", "a") # open connection to append

# get post second CAM data
writeLines("", "secondPostCAM.txt") # create file
text_connection_postSecond <-
  file("secondPostCAM.txt", "a") # open connection to append


for (i in 1:length(folders)) {
  setwd(folders[i])
  if (length(dir()) == 5) {
    # print(i)
    
    # pre CAM data
    setwd(dir()[1])
    tmp1 <- jsonlite::fromJSON(txt = "data.txt")
    tmp_id <- tmp1$ID[!is.na(tmp1$ID)]
    setwd("..")
    
    
    # CAM data
    setwd(dir()[2])
    tmp2 <- jsonlite::fromJSON(txt = "data.txt")
    if (!("creator" %in% names(tmp2)) || tmp2$creator != tmp_id) {
      warning("IDs (primary keys) not matching or order of datasets is distorted - 1")
      tmp2 <- NULL
    }
    setwd("..")
    
    
    
    # post CAM data
    setwd(dir()[3])
    tmp3 <- jsonlite::fromJSON(txt = "data.txt")
    tmp3$ID <- NA
    tmp3$ID[2] <- tmp_id
    setwd("..")
    
    # CAM data second
    setwd(dir()[4])
    tmp4 <- jsonlite::fromJSON(txt = "data.txt")
    if (!("creator" %in% names(tmp4)) || tmp4$creator != tmp_id) {
      warning("IDs (primary keys) not matching or order of datasets is distorted - 1")
      tmp4 <- NULL
    }
    # tmp$creator <- paste0(tmp$creator, "_t2")
    setwd("..")
    
    # post CAM data second
    setwd(dir()[5])
    tmp5 <- jsonlite::fromJSON(txt = "data.txt")
    tmp5$ID <- NA
    tmp5$ID[2] <- tmp_id
    setwd("..")
    
    
    if (!is.null(tmp1) &&
        !is.null(tmp2) &&
        !is.null(tmp3) && !is.null(tmp4) && !is.null(tmp5)) {
      writeLines(jsonlite::toJSON(x = tmp1), text_connection_pre)
      writeLines(jsonlite::toJSON(x = tmp2), text_connection)
      writeLines(jsonlite::toJSON(x = tmp3), text_connection_post)
      writeLines(jsonlite::toJSON(x = tmp4), text_connection_second)
      writeLines(jsonlite::toJSON(x = tmp5), text_connection_postSecond)
    }
  }
  setwd("..")
}

Warning: IDs (primary keys) not matching or order of datasets is distorted - 1
Warning: IDs (primary keys) not matching or order of datasets is distorted - 1

close(text_connection) # close connection CAM
close(text_connection_pre) # close connection
close(text_connection_post) # close connection
close(text_connection_second) # close connection CAM
close(text_connection_postSecond) # close connection

### move files to output folder
# copy files (not overwritten)
tmp_file_from <-  getwd()
setwd("../outputs/data_raw")
file.copy(
  from =  paste0(tmp_file_from, "/CAM_t1.txt"),
  to = paste0(getwd(), "/CAM_t1.txt")
)

[1] FALSE

file.copy(
  from =  paste0(tmp_file_from, "/preCAM.txt"),
  to = paste0(getwd(), "/preCAM.txt")
)

[1] FALSE

file.copy(
  from =  paste0(tmp_file_from, "/postCAM.txt"),
  to = paste0(getwd(), "/postCAM.txt")
)

[1] FALSE

file.copy(
  from =  paste0(tmp_file_from, "/secondPostCAM.txt"),
  to = paste0(getwd(), "/secondPostCAM.txt")
)

[1] FALSE

file.copy(
  from =  paste0(tmp_file_from, "/CAM_t2.txt"),
  to = paste0(getwd(), "/CAM_t2.txt")
)

[1] FALSE

# remove files
file.remove(paste0(tmp_file_from, "/CAM_t1.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/preCAM.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/postCAM.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/secondPostCAM.txt"))

[1] TRUE

file.remove(paste0(tmp_file_from, "/CAM_t2.txt"))

[1] TRUE

### load functions
# print(getwd())
setwd("../../../../functions")
for (i in 1:length(dir())) {
  # print(dir()[i])
  source(dir()[i], encoding = "utf-8")
}


setwd("../functions_CAMapp")
for (i in 1:length(dir())) {
  # print(dir()[i])
  source(dir()[i], encoding = "utf-8")
}
rm(i)


rm(tmp1)
rm(tmp2)
rm(tmp3)
rm(tmp4)
rm(tmp5)
rm(tmp_id)
rm(folders)
rm(tmp_file_from)
rm(text_connection)
rm(text_connection_post)
rm(text_connection_postSecond)
rm(text_connection_pre)
rm(text_connection_second)

merge CAM data sets

setwd("outputs/data_raw")
# Read the contents of both files
cam_content <- readLines("CAM_t1.txt")
second_cam_content <- readLines("CAM_t2.txt")

# Create a new file and write combined contents
writeLines(c(cam_content, second_cam_content), "CAM_t1t2.txt")

rm(cam_content)
rm(second_cam_content)

set up data.frame questionnaires

load data:

setwd("outputs/data_raw")
# > pre study
suppressMessages(read_file('preCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_preCAM

# > post first CAM
suppressMessages(read_file('postCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_postCAM

# > post second CAM
suppressMessages(read_file('secondPostCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_secondPostCAM

rename ID variable for data sets:

colnames(dat_preCAM)[colnames(dat_preCAM) == "ID"] <- "uniqueID"
colnames(dat_postCAM)[colnames(dat_postCAM) == "ID"] <- "uniqueID"
colnames(dat_secondPostCAM)[colnames(dat_secondPostCAM) == "ID"] <- "uniqueID"

add unique ID variable to match rows to participants, only keep complete data sets:

### create counter variable for both data sets
# pre study
dat_preCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_preCAM)){
  if(!is.na(dat_preCAM$sender[i]) && dat_preCAM$sender[i] == "Greetings"){
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_preCAM$ID[i] <- tmp_IDcounter
}



# post study
dat_postCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_postCAM)){
  if(!is.na(dat_postCAM$sender[i]) && dat_postCAM$sender[i] == "break500ms_1"){
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_postCAM$ID[i] <- tmp_IDcounter
}

# second post study
dat_secondPostCAM$ID <- NA
tmp_IDcounter <- 0
for(i in 1:nrow(dat_secondPostCAM)){
  if(!is.na(dat_secondPostCAM$sender[i]) && dat_secondPostCAM$sender[i] == "break500ms_2"){
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_secondPostCAM$ID[i] <- tmp_IDcounter
}



### keep only complete data sets
# pre-study
# sort(table(dat_preCAM$ID))
sum(table(dat_preCAM$ID) != max(table(dat_preCAM$ID)))

[1] 0

sum(table(dat_preCAM$ID) == max(table(dat_preCAM$ID)))

[1] 11

dat_preCAM <- dat_preCAM[dat_preCAM$ID %in% names(table(dat_preCAM$ID))[table(dat_preCAM$ID) == max(table(dat_preCAM$ID))],]

# post-study
# sort(table(dat_postCAM$ID))
sum(table(dat_postCAM$ID) != max(table(dat_postCAM$ID)))

[1] 0

sum(table(dat_postCAM$ID) == max(table(dat_postCAM$ID)))

[1] 11

dat_postCAM <- dat_postCAM[dat_postCAM$ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) == max(table(dat_postCAM$ID))],]

# post-study second
# sort(table(dat_secondPostCAM$ID))
sum(table(dat_secondPostCAM$ID) != max(table(dat_secondPostCAM$ID)))

[1] 2

sum(table(dat_secondPostCAM$ID) == max(table(dat_secondPostCAM$ID)))

[1] 9

dat_secondPostCAM <- dat_secondPostCAM[dat_secondPostCAM$ID %in% names(table(dat_secondPostCAM$ID))[table(dat_secondPostCAM$ID) == max(table(dat_secondPostCAM$ID))],]


all(unique(dat_preCAM$ID) %in% unique(dat_postCAM$ID))

[1] TRUE

all(unique(dat_preCAM$ID) %in% unique(dat_secondPostCAM$ID))

[1] FALSE

questionnaire pre-study (component 1)

colnames(dat_preCAM)

 [1] "sender"                  "sender_type"            
 [3] "sender_id"               "6"                      
 [5] "ended_on"                "duration"               
 [7] "time_run"                "time_render"            
 [9] "time_show"               "time_end"               
[11] "time_commit"             "timestamp"              
[13] "time_switch"             "currentLocation"        
[15] "IDtype"                  "uniqueID"               
[17] "dummy_informedconsent"   "commCheck"              
[19] "meta.labjs_version"      "meta.location"          
[21] "meta.userAgent"          "meta.platform"          
[23] "meta.language"           "meta.locale"            
[25] "meta.timeZone"           "meta.timezoneOffset"    
[27] "meta.screen_width"       "meta.screen_height"     
[29] "meta.scroll_width"       "meta.scroll_height"     
[31] "meta.window_innerWidth"  "meta.window_innerHeight"
[33] "meta.devicePixelRatio"   "meta.labjs_build.flavor"
[35] "meta.labjs_build.commit" "para_defocuscount"      
[37] "7"                       "ID"

tmp_notNumeric <- str_subset(string = colnames(dat_preCAM), pattern = "^meta|^R")
tmp_notNumeric <- str_subset(string = tmp_notNumeric, pattern = "labjs|location", negate = TRUE)


vec_ques <- c("uniqueID",
              "currentLocation", "IDtype",
              "dummy_informedconsent", 
              "commCheck",
              tmp_notNumeric)

vec_notNumeric = c("uniqueID", "currentLocation", "IDtype", tmp_notNumeric)

questionnaire_c1 <- questionnairetype(dataset = dat_preCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)


dim(questionnaire_c1)

[1] 11 19

colnames(questionnaire_c1)

 [1] "ID"                      "uniqueID"               
 [3] "currentLocation"         "IDtype"                 
 [5] "dummy_informedconsent"   "commCheck"              
 [7] "meta.userAgent"          "meta.platform"          
 [9] "meta.language"           "meta.locale"            
[11] "meta.timeZone"           "meta.timezoneOffset"    
[13] "meta.screen_width"       "meta.screen_height"     
[15] "meta.scroll_width"       "meta.scroll_height"     
[17] "meta.window_innerWidth"  "meta.window_innerHeight"
[19] "meta.devicePixelRatio"

questionnaire post-CAM (component 3)

colnames(dat_postCAM)

 [1] "sender"                     "sender_type"               
 [3] "sender_id"                  "ended_on"                  
 [5] "duration"                   "time_run"                  
 [7] "time_render"                "time_show"                 
 [9] "time_end"                   "time_commit"               
[11] "timestamp"                  "time_switch"               
[13] "feedCAM_repres"             "feedCAM_technicalprobs"    
[15] "feedCAM_technicalprobsText" "feedCAM_already"           
[17] "feedCAM_alreadyText"        "18"                        
[19] "uniqueID"                   "para_defocuscount"         
[21] "19"                         "ID"

vec_ques <- c("uniqueID",
              "feedCAM_repres", 
              "feedCAM_technicalprobs", "feedCAM_technicalprobsText",
              "feedCAM_already", "feedCAM_alreadyText")

vec_notNumeric = c("uniqueID", 
                   "feedCAM_technicalprobsText", "feedCAM_alreadyText")

questionnaire_c3 <- questionnairetype(dataset = dat_postCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)

dim(questionnaire_c3)

[1] 11  7

colnames(questionnaire_c3)

[1] "ID"                         "uniqueID"                  
[3] "feedCAM_repres"             "feedCAM_technicalprobs"    
[5] "feedCAM_technicalprobsText" "feedCAM_already"           
[7] "feedCAM_alreadyText"

questionnaire post-second-CAM (component 5):

colnames(dat_secondPostCAM)

 [1] "sender"                   "sender_type"             
 [3] "sender_id"                "ended_on"                
 [5] "duration"                 "time_run"                
 [7] "time_render"              "time_show"               
 [9] "time_end"                 "time_commit"             
[11] "timestamp"                "time_switch"             
[13] "feedCAM_repres_2"         "ease_mindmap"            
[15] "15"                       "uniqueID"                
[17] "para_countclicks"         "CCDisImpair-anxiety3"    
[19] "CCDisImpair-sadness1r"    "CCDisImpair-anxiety5r"   
[21] "CCDisImpair-impairment5r" "CCDisImpair-anger2"      
[23] "CCDisImpair-sadness4"     "CCDisImpair-anger4r"     
[25] "CCDisImpair-impairment4"  "CCDisImpair-sadness2"    
[27] "CCDisImpair-sadness3"     "CCDisImpair-anxiety1"    
[29] "CCDisImpair-sadness5"     "CCDisImpair-impairment1" 
[31] "CCDisImpair-impairment2r" "CCDisImpair-anger1"      
[33] "CCDisImpair-anger3r"      "CCDisImpair-impairment3" 
[35] "CCDisImpair-anger5r"      "CCDisImpair-impairment7" 
[37] "CCDisImpair-impairment8"  "CCDisImpair-anxiety4"    
[39] "CCDisImpair-impairment6r" "CCDisImpair-anxiety2r"   
[41] "education-3"              "education-2"             
[43] "education-1"              "education-4"             
[45] "guttman-item"             "not_needed"              
[47] "guttman-response"         "sociodemo_age"           
[49] "sociodemo_gender"         "sociodemo_language"      
[51] "subjects_taught"          "school_types"            
[53] "feedback_critic"          "ID"

vec_ques <- c(
  "uniqueID",
  "feedCAM_repres_2",
  "ease_mindmap",
  str_subset(string = colnames(dat_secondPostCAM), pattern = "^CCDisImpair|^education"),
  "sociodemo_age",
  "sociodemo_gender",
  "sociodemo_language",
  "subjects_taught",
  "school_types",
  "feedback_critic"
)

vec_notNumeric = c(
  "uniqueID",
  "sociodemo_gender",
  "sociodemo_language",
  "subjects_taught",
  "school_types",
  "feedback_critic"
)

questionnaire_c5 <- questionnairetype(
  dataset = dat_secondPostCAM,
  listvars = vec_ques,
  notNumeric = vec_notNumeric,
  verbose = FALSE
)

dim(questionnaire_c5)

[1]  9 37

colnames(questionnaire_c5)

 [1] "ID"                       "uniqueID"                
 [3] "feedCAM_repres_2"         "ease_mindmap"            
 [5] "CCDisImpair-anxiety3"     "CCDisImpair-sadness1r"   
 [7] "CCDisImpair-anxiety5r"    "CCDisImpair-impairment5r"
 [9] "CCDisImpair-anger2"       "CCDisImpair-sadness4"    
[11] "CCDisImpair-anger4r"      "CCDisImpair-impairment4" 
[13] "CCDisImpair-sadness2"     "CCDisImpair-sadness3"    
[15] "CCDisImpair-anxiety1"     "CCDisImpair-sadness5"    
[17] "CCDisImpair-impairment1"  "CCDisImpair-impairment2r"
[19] "CCDisImpair-anger1"       "CCDisImpair-anger3r"     
[21] "CCDisImpair-impairment3"  "CCDisImpair-anger5r"     
[23] "CCDisImpair-impairment7"  "CCDisImpair-impairment8" 
[25] "CCDisImpair-anxiety4"     "CCDisImpair-impairment6r"
[27] "CCDisImpair-anxiety2r"    "education-3"             
[29] "education-2"              "education-1"             
[31] "education-4"              "sociodemo_age"           
[33] "sociodemo_gender"         "sociodemo_language"      
[35] "subjects_taught"          "school_types"            
[37] "feedback_critic"

get ratings of law and guttman

# Pre-define the column names with rat_ pattern
guttman_cols <- sort(str_subset(string = colnames(dat_secondPostCAM), pattern = "^guttman"))

# to merge data sets
for(i in 1:length(guttman_cols)){
  dat_postCAM[[guttman_cols[i]]] <- NA
}

# Create an empty list to store processed data
result_list <- list()

dat_merged <- rbind(dat_postCAM[,  c("ID", "uniqueID", "sender", guttman_cols)], dat_secondPostCAM[,  c("ID", "uniqueID", "sender", guttman_cols)])
dat_merged$`guttman-response`[!is.na(dat_merged$`guttman-item`) & dat_merged$`guttman-response` == "NA"] <- "LNA"

# Loop over each unique ID
for (i in unique(dat_merged$ID)) {
  tmp <- dat_merged[dat_merged$ID == i, c("uniqueID", "sender", guttman_cols)]
  
  # Fill down uniqueID if missing
  tmp <- tmp %>%
    fill(uniqueID, .direction = "downup")
  
  # Filter rows where all rat_ columns are not NA
tmp <- tmp %>% 
  filter(
    if_any(all_of(guttman_cols), ~ !is.na(.))     # at least one guttman_* value present
  )
tmp$sender[is.na(tmp$sender)] <- paste0("GI_", tmp$`guttman-item`[!is.na(tmp$`guttman-item`)])
  
  # Append to result list
  result_list[[as.character(i)]] <- tmp
}

# Combine all into one dataframe
questionnaire_rating_long <- bind_rows(result_list)
questionnaire_rating_long$sender <- factor(questionnaire_rating_long$sender)


# wide data set for guttman ratings
colnames(questionnaire_rating_long)[colnames(questionnaire_rating_long) == "guttman-item"] <- "item"
colnames(questionnaire_rating_long)[colnames(questionnaire_rating_long) == "guttman-response"] <- "response"

questionnaire_rating_wide_guttman <- questionnaire_rating_long %>%
  pivot_wider(
    id_cols = uniqueID,
    names_from = item,
    values_from = response,
    names_glue = "response_{item}"
  )

merge all data sets

# Start with the first dataset
merged_data <- questionnaire_c1

# Left join the others one by one
merged_data <- merged_data %>%
  left_join(questionnaire_c3, by = "uniqueID") %>%
  left_join(questionnaire_c5, by = "uniqueID") %>%
  left_join(questionnaire_rating_wide_guttman, by = "uniqueID")


merged_data$ID.x <- NULL
merged_data$ID.y <- NULL

dim(merged_data)

[1] 11 63

colnames(merged_data)

 [1] "uniqueID"                   "currentLocation"           
 [3] "IDtype"                     "dummy_informedconsent"     
 [5] "commCheck"                  "meta.userAgent"            
 [7] "meta.platform"              "meta.language"             
 [9] "meta.locale"                "meta.timeZone"             
[11] "meta.timezoneOffset"        "meta.screen_width"         
[13] "meta.screen_height"         "meta.scroll_width"         
[15] "meta.scroll_height"         "meta.window_innerWidth"    
[17] "meta.window_innerHeight"    "meta.devicePixelRatio"     
[19] "feedCAM_repres"             "feedCAM_technicalprobs"    
[21] "feedCAM_technicalprobsText" "feedCAM_already"           
[23] "feedCAM_alreadyText"        "ID"                        
[25] "feedCAM_repres_2"           "ease_mindmap"              
[27] "CCDisImpair-anxiety3"       "CCDisImpair-sadness1r"     
[29] "CCDisImpair-anxiety5r"      "CCDisImpair-impairment5r"  
[31] "CCDisImpair-anger2"         "CCDisImpair-sadness4"      
[33] "CCDisImpair-anger4r"        "CCDisImpair-impairment4"   
[35] "CCDisImpair-sadness2"       "CCDisImpair-sadness3"      
[37] "CCDisImpair-anxiety1"       "CCDisImpair-sadness5"      
[39] "CCDisImpair-impairment1"    "CCDisImpair-impairment2r"  
[41] "CCDisImpair-anger1"         "CCDisImpair-anger3r"       
[43] "CCDisImpair-impairment3"    "CCDisImpair-anger5r"       
[45] "CCDisImpair-impairment7"    "CCDisImpair-impairment8"   
[47] "CCDisImpair-anxiety4"       "CCDisImpair-impairment6r"  
[49] "CCDisImpair-anxiety2r"      "education-3"               
[51] "education-2"                "education-1"               
[53] "education-4"                "sociodemo_age"             
[55] "sociodemo_gender"           "sociodemo_language"        
[57] "subjects_taught"            "school_types"              
[59] "feedback_critic"            "response_3"                
[61] "response_1"                 "response_4"                
[63] "response_2"

save all data sets

setwd("outputs/questionnaire")

# Save as .RData objects
save(questionnaire_c1, file = "questionnaire_c1.RData")
save(questionnaire_c3, file = "questionnaire_c3.RData")
save(questionnaire_c5, file = "questionnaire_c5.RData")
save(merged_data, file = "merged_data.RData")
save(questionnaire_rating_long, file = "questionnaire_rating_long.RData")

# Save as Excel files
write_xlsx(questionnaire_c1, "questionnaire_c1.xlsx")
write_xlsx(questionnaire_c3, "questionnaire_c3.xlsx")
write_xlsx(questionnaire_c5, "questionnaire_c5.xlsx")
write_xlsx(merged_data, "merged_data.xlsx")
write_xlsx(questionnaire_rating_long, "questionnaire_rating_long.xlsx")

set up CAM data

pre CAM (component 2)

Load CAM data

setwd("outputs/data_raw")
suppressMessages(read_file("CAM_t1.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_pre

raw_CAM_pre <- list()
for(i in 1:length(dat_CAM_pre)){
  raw_CAM_pre[[i]] <- jsonlite::fromJSON(txt = dat_CAM_pre[[i]])
}

Create CAM files, draw CAMs and compute network indicators

### create CAM single files (nodes, connectors, merged)
CAMfiles_pre <- create_CAMfiles(datCAM = raw_CAM_pre, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  12 
 # deleted connectors:  13

# remove testing data sets
nrow(CAMfiles_pre[[1]])

[1] 196

# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][nchar(CAMfiles_pre[[1]]$participantCAM) == 24,]
# CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][nchar(CAMfiles_pre[[2]]$participantCAM) == 24,]
# CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][nchar(CAMfiles_pre[[3]]$participantCAM.x) == 24,]
# number of CAMs collected
nrow(CAMfiles_pre[[1]])

[1] 196

# remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_pre[[1]]$participantCAM[CAMfiles_pre[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
# print(tmp_pid)

## remove person from questionnaire data
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% tmp_pid,]

## remove person from CAM data
# table(CAMfiles_pre[[1]][CAMfiles_pre[[1]]$participantCAM %in% tmp_pid,]$text)
# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][!CAMfiles_pre[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][!CAMfiles_pre[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][!CAMfiles_pre[[3]]$participantCAM.x %in% tmp_pid,]


# remove 7 empty concepts:
# CAMfiles_pre[[1]]$text[nchar(CAMfiles_pre[[1]]$text) < 2]
# tmp_ids <- CAMfiles_pre[[1]]$id[nchar(CAMfiles_pre[[1]]$text) < 2]
# table(CAMfiles_pre[[1]]$isActive[CAMfiles_pre[[1]]$id %in% tmp_ids])
# table(CAMfiles_pre[[1]]$participantCAM[CAMfiles_pre[[1]]$id %in% tmp_ids])
# 
# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][!CAMfiles_pre[[1]]$id %in% tmp_ids,]




CAMfiles_pre[[1]]$text[CAMfiles_pre[[1]]$text == "Wissenschaft- Interesse"] <- "Wissenschaft-Interesse"
CAMfiles_pre[[1]]$text[CAMfiles_pre[[1]]$text == "BNE fächerbezogen"] <- "BNE-fächerbezogen"


### draw CAMs
CAMdrawn_pre <- draw_CAM(dat_merged = CAMfiles_pre[[3]],
                     dat_nodes = CAMfiles_pre[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 11 CAMs...

Warning: `graph.data.frame()` was deprecated in igraph 2.0.0.
ℹ Please use `graph_from_data_frame()` instead.

Warning: `as.undirected()` was deprecated in igraph 2.1.0.
ℹ Please use `as_undirected()` instead.

[1] "== participantCAM in drawnCAM"

for(i in 1:length(CAMdrawn_pre)){
  if(any(nchar(V(CAMdrawn_pre[[i]])$label) < 3)){
    print(V(CAMdrawn_pre[[i]])$label)
  }
}

                                               
             
             
### network indicators
tmp_microIndicator <- c("BNE", "BNE-fächerbezogen", "BNE Relevanz", "Empirische Fachdidaktik", "Wissenschaft-Interesse", "Verantwortung")
networkIndicators_pre <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_pre, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)

Warning: `graph.density()` was deprecated in igraph 2.0.0.
ℹ Please use `edge_density()` instead.

Warning: The `types1` argument of `assortativity()` is deprecated as of igraph 1.6.0.
ℹ Please use the `values` argument instead.

Warning: `assortativity.degree()` was deprecated in igraph 2.0.0.
ℹ Please use `assortativity_degree()` instead.

### wordlist
CAMwordlist_pre <- create_wordlist(
  dat_nodes =  CAMfiles_pre[[1]],
  dat_merged =  CAMfiles_pre[[3]],
  useSummarized = TRUE,
  order = "frequency",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

[1] "create_wordlist - use raw words"
[1] 0
[1] 196
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 11 CAMs... 
[1] "== participantCAM in drawnCAM"

if(all(nchar(CAMwordlist_pre$Words) > 2)){
  print("sucessfully removed empty words")
}

DT::datatable(CAMwordlist_pre, options = list(pageLength = 5))

save CAMs as .json files, and as .png (igraph)

if(save_CAMs_as_pictures){
setwd("outputs")

setwd("savedCAMs_pre")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .png files have been deleted')
}

### if no participant ID was provided replace by randomly generated CAM ID

# if(all(CAMfiles_pre[[3]]$participantCAM.x == "noID")){
#   CAMfiles_pre[[3]]$participantCAM.x <- CAMfiles_pre[[3]]$CAM.x
# }

### save as .json files, and as .png (igraph)
ids_CAMs <- unique(CAMfiles_pre[[3]]$participantCAM.x); length(ids_CAMs)


for(i in 1:length(ids_CAMs)){
  save_graphic(filename = paste0("CAM", "_t1_", ids_CAMs[i])) #  paste0(ids_CAMs[i]))
  CAM_igraph <- CAMdrawn_pre[[c(1:length(CAMdrawn_pre))[
    names(CAMdrawn_pre) == paste0(unique(CAMfiles_pre[[3]]$participantCAM.x)[i])]]]
  plot(CAM_igraph, edge.arrow.size = .7,
       layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
       vertex.size = 10, vertex.label.cex = .9)
  dev.off()
}

setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .json files have been deleted')
}
for(i in 1:length(raw_CAM_pre)){
  if(!is_empty(raw_CAM_pre[[i]]$nodes)){
    if(nrow(raw_CAM_pre[[i]]$nodes) > 5){
      write(toJSON(raw_CAM_pre[[i]], encoding = "UTF-8"),
            paste0(raw_CAM_pre[[i]]$creator, ".json"))
    }
  }
}
}

post CAM (component 4)

Load CAM data

setwd("outputs/data_raw")
suppressMessages(read_file("CAM_t2.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_post

raw_CAM_post <- list()
for(i in 1:length(dat_CAM_post)){
  raw_CAM_post[[i]] <- jsonlite::fromJSON(txt = dat_CAM_post[[i]])
}

Create CAM files, draw CAMs and compute network indicators

### create CAM single files (nodes, connectors, merged)
CAMfiles_post <- create_CAMfiles(datCAM = raw_CAM_post, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  9 
 # deleted connectors:  1

# remove testing data sets
nrow(CAMfiles_post[[1]])

[1] 202

# CAMfiles_post[[1]] <- CAMfiles_post[[1]][nchar(CAMfiles_post[[1]]$participantCAM) >= 24,]
# CAMfiles_post[[2]] <- CAMfiles_post[[2]][nchar(CAMfiles_post[[2]]$participantCAM) >= 24,]
# CAMfiles_post[[3]] <- CAMfiles_post[[3]][nchar(CAMfiles_post[[3]]$participantCAM.x) >= 24,]
# number of CAMs collected
nrow(CAMfiles_post[[1]])

[1] 202

# remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_post[[1]]$participantCAM[CAMfiles_post[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
# print(tmp_pid)

## removed person already from questionnaire data

# ## remove person from CAM data
# table(CAMfiles_post[[1]][CAMfiles_post[[1]]$participantCAM %in% tmp_pid,]$text)
# CAMfiles_post[[1]] <- CAMfiles_post[[1]][!CAMfiles_post[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_post[[2]] <- CAMfiles_post[[2]][!CAMfiles_post[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_post[[3]] <- CAMfiles_post[[3]][!CAMfiles_post[[3]]$participantCAM.x %in% tmp_pid,]
# 
# 
# # removed 4 empty concepts:
# CAMfiles_post[[1]]$text[nchar(CAMfiles_post[[1]]$text) < 2  & CAMfiles_post[[1]]$text != "f"]
# tmp_ids <- CAMfiles_post[[1]]$id[nchar(CAMfiles_post[[1]]$text) < 2 & CAMfiles_post[[1]]$text != "f"]
# table(CAMfiles_post[[1]]$isActive[CAMfiles_post[[1]]$id %in% tmp_ids])
# table(CAMfiles_post[[1]]$participantCAM[CAMfiles_post[[1]]$id %in% tmp_ids])
# 
# 
# CAMfiles_post[[1]] <- CAMfiles_post[[1]][!CAMfiles_post[[1]]$id %in% tmp_ids,]

CAMfiles_post[[1]]$text[CAMfiles_post[[1]]$text == "Wissenschaft- Interesse"] <- "Wissenschaft-Interesse"
CAMfiles_post[[1]]$text[CAMfiles_post[[1]]$text == "BNE fächerbezogen"] <- "BNE-fächerbezogen"


### draw CAMs
CAMdrawn_post <- draw_CAM(dat_merged = CAMfiles_post[[3]],
                     dat_nodes = CAMfiles_post[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 11 CAMs... 
[1] "== participantCAM in drawnCAM"

### network indicators
tmp_microIndicator <- c("BNE", "BNE-fächerbezogen", "BNE Relevanz", "Empirische Fachdidaktik", "Wissenschaft-Interesse", "Verantwortung")
networkIndicators_post <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_post, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)


# wordlist
CAMwordlist_post <- create_wordlist(
  dat_nodes =  CAMfiles_post[[1]],
  dat_merged =  CAMfiles_post[[3]],
  order = "frequency",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

[1] "create_wordlist - use raw words"
[1] 0
[1] 202
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 11 CAMs... 
[1] "== participantCAM in drawnCAM"

if(all(nchar(CAMwordlist_post$Words) > 2)){
  print("sucessfully removed empty words")
}else{
  CAMwordlist_post$Words[nchar(CAMwordlist_post$Words) < 2]
}

[1] "sucessfully removed empty words"

DT::datatable(CAMwordlist_post, options = list(pageLength = 5))

save CAMs as .json files, and as .png (igraph)

if(save_CAMs_as_pictures){
setwd("outputs")

setwd("savedCAMs_post")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .png files have been deleted')
}

### if no participant ID was provided replace by randomly generated CAM ID

if(all(CAMfiles_post[[3]]$participantCAM.x == "noID")){
  CAMfiles_post[[3]]$participantCAM.x <- CAMfiles_post[[3]]$CAM.x
}

### save as .json files, and as .png (igraph)
ids_CAMs <- unique(CAMfiles_post[[3]]$participantCAM.x); length(ids_CAMs)


for(i in 1:length(ids_CAMs)){
  save_graphic(filename = paste0("CAM", "_t2_", ids_CAMs[i])) #  paste0(ids_CAMs[i]))
  CAM_igraph <- CAMdrawn_post[[c(1:length(CAMdrawn_post))[
    names(CAMdrawn_post) == paste0(unique(CAMfiles_post[[3]]$participantCAM.x)[i])]]]
  plot(CAM_igraph, edge.arrow.size = .7,
       layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
       vertex.size = 10, vertex.label.cex = .9)
  dev.off()
}

setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .json files have been deleted')
}
for(i in 1:length(raw_CAM_post)){
  if(!is_empty(raw_CAM_post[[i]]$nodes)){
    if(nrow(raw_CAM_post[[i]]$nodes) > 5){
      write(toJSON(raw_CAM_post[[i]], encoding = "UTF-8"),
            paste0(raw_CAM_post[[i]]$creator, ".json"))
    }
  }
}
}

merge CAM data

Load CAM data

setwd("outputs/data_raw")
suppressMessages(read_file("CAM_t1t2.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_combined

raw_CAM_combined <- list()
for(i in 1:length(dat_CAM_combined)){
  raw_CAM_combined[[i]] <- jsonlite::fromJSON(txt = dat_CAM_combined[[i]])
}

length(raw_CAM_combined)

[1] 22

Create CAM files, draw CAMs and compute network indicators

### create CAM single files (nodes, connectors, merged)
CAMfiles_combined <- create_CAMfiles(datCAM = raw_CAM_combined, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  21 
 # deleted connectors:  14

# remove testing data sets
nrow(CAMfiles_combined[[1]])

[1] 398

# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][nchar(CAMfiles_combined[[1]]$participantCAM) >= 24,]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][nchar(CAMfiles_combined[[2]]$participantCAM) >= 24,]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][nchar(CAMfiles_combined[[3]]$participantCAM.x) >= 24,]
nrow(CAMfiles_combined[[1]])

[1] 398

# remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
# print(tmp_pid)

## removed person already from questionnaire data

## remove person from CAM data
# table(CAMfiles_combined[[1]][CAMfiles_combined[[1]]$participantCAM %in% tmp_pid,]$text)
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% tmp_pid,]


length(unique(CAMfiles_combined[[1]]$CAM))

[1] 22

# remove empty concepts:
# CAMfiles_combined[[1]]$text[nchar(CAMfiles_combined[[1]]$text) < 2  & CAMfiles_combined[[1]]$text != "f"]
# tmp_ids <- CAMfiles_combined[[1]]$id[nchar(CAMfiles_combined[[1]]$text) < 2 & CAMfiles_combined[[1]]$text != "f"]
# table(CAMfiles_combined[[1]]$isActive[CAMfiles_combined[[1]]$id %in% tmp_ids])
# table(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$id %in% tmp_ids])
# 
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$id %in% tmp_ids,]


### add protocol # 
# if(consider_Protocol){
#   setwd("outputs/01_dataPreperation")
# 
#   text <- readLines("protocol_after_word2vec.txt", warn = FALSE)
#   text <- readLines(textConnection(text, encoding="UTF-8"), encoding="UTF-8")
# 
#   if (testIfJson(file = text)) {
#     protocol <- rjson::fromJSON(file = "protocol_after_word2vec.txt")
#   
#     ## no CAM deleted  
#   # CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% protocol$currentCAMs,]
#   # CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][CAMfiles_combined[[2]]$CAM %in% protocol$currentCAMs,]
#   # CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][CAMfiles_combined[[3]]$CAM.x %in% protocol$currentCAMs,]
# 
# 
#   tmp_out <- overwriteTextNodes(protocolDat = protocol,
#                                 nodesDat = CAMfiles_combined[[1]])
#   CAMfiles_combined[[1]] <- tmp_out[[1]]
#   # tmp_out[[2]]
#   
#   } else{
#     print("Invalid protocol uploaded")
#   }
# }



# vec_CAMs <- c(); h = 1
# for(c in unique(CAMfiles_combined[[1]]$CAM)){
#   tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% c,]
#   
#   if(!(any(c("Rettungsroboter", "sozialer Assistenzroboter") %in% tmp$text) & all(c("Vorteile", "Nachteile") %in% tmp$text))){
#     print(c)
#     print(tmp$text)
#     vec_CAMs[h] <- c
#     h = h + 1
#     # plot(CAMdrawn_combined[[c]])
#   }
# }

## fix manually
# single pre defined concepts were falsely written
## Soziale Assistenzroboter to sozialer Assistenzroboter
# CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"] 
# vec_CAMs <- vec_CAMs[!vec_CAMs %in% CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"]]
# 
# CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"] <- "Soziale Assistenzroboter"
# CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"] <- "Soziale Assistenzroboter_positive"
# 
# ## Roboter to sozialer Rettungsroboter
# CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Roboter"]
# vec_CAMs <- vec_CAMs[!vec_CAMs %in% CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Roboter"]]
# 
# CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Roboter"] <- "Rettungsroboter"
# CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Rettungsroboter"] <- "Rettungsroboter_neutral"              


## remove 5 persons because of technical issues
# vec_Pids <- unique(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$CAM %in% vec_CAMs])
# vec_Pids
# 
# # remove CAMs
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% vec_Pids,]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% vec_Pids,]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% vec_Pids,]

# remove questionnaires
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% vec_Pids,]




# remove person "6560e6f734ae18bd18474cc9" -> only draw pre-defined concepts
# for(c in unique(CAMfiles_combined[[1]]$participantCAM)){
#   tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$participantCAM %in% c,]
#   
#   if(sum(tmp$text %in% c("Vorteile", "Nachteile")) != 4){
#     print(c)
#     print(sum(tmp$text %in% c("Vorteile", "Nachteile")))
#     print(tmp$text)
#   }
# }

## removed person from questionnaire data
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% "6560e6f734ae18bd18474cc9",]
# 
# ## remove person from CAM data
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% "6560e6f734ae18bd18474cc9",]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% "6560e6f734ae18bd18474cc9",]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% "6560e6f734ae18bd18474cc9",]



# remove person "65304e8a630196510c79f7df" -> draw multiple times concept "leer"
# for(c in unique(CAMfiles_combined[[1]]$CAM)){
#   tmp <-  CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM == c,]
#   
#   if(any(table(tmp$text) >= 3)){
#     print(c)
#     print(sort(table(tmp$text)))
#   }
# }
# 
# # remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$CAM %in% c("503b3517-b003-48e5-b121-f48c9a64ecb6", "39e7d213-1276-4da8-99ea-5a13487874e7")])
# print(tmp_pid)
# 
# ## remove person from questionnaire data
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% tmp_pid,]
# 
# ## remove person from CAM data
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% tmp_pid,]

CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$text == "Wissenschaft- Interesse"] <- "Wissenschaft-Interesse"
CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$text == "BNE fächerbezogen"] <- "BNE-fächerbezogen"



### draw CAMs
CAMdrawn_combined <- draw_CAM(dat_merged = CAMfiles_combined[[3]],
                     dat_nodes = CAMfiles_combined[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 22 CAMs... 
[1] "== ids_CAMs in drawnCAM"

### network indicators
tmp_microIndicator <- c("BNE", "BNE-fächerbezogen", "BNE Relevanz", "Empirische Fachdidaktik", "Wissenschaft-Interesse", "Verantwortung")
networkIndicators_combined <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_combined, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)


# wordlist
CAMwordlist_combined <- create_wordlist(
  dat_nodes =  CAMfiles_combined[[1]],
  dat_merged =  CAMfiles_combined[[3]],
  order = "frequency",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

[1] "create_wordlist - use raw words"
[1] 0
[1] 398
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 22 CAMs... 
[1] "== ids_CAMs in drawnCAM"

if(all(nchar(CAMwordlist_combined$Words) > 2)){
  print("sucessfully removed empty words")
}else{
  CAMwordlist_combined$Words[nchar(CAMwordlist_combined$Words) < 2]
}

[1] ""

DT::datatable(CAMwordlist_combined, options = list(pageLength = 5))

identify types of changes (delta CAM)

!!! old approach

# backupIDs_post <- CAMfiles_post[[1]]$participantCAM
# CAMfiles_post[[1]]$participantCAM <- str_remove_all(string =  CAMfiles_post[[1]]$participantCAM, pattern = "_t2$")
### set A, B, C, D types
if (all(unique(CAMfiles_pre[[1]]$participantCAM) == unique(CAMfiles_post[[1]]$participantCAM))) {
  vec_type <- c()
  error <- 0
  verbose = FALSE
  
  ##
  list_newWords_text <- list()
  list_newWords_value <- list()
  list_ids <- list()
  h = 1
  for (i in 1:length(unique(CAMfiles_pre[[1]]$participantCAM))) {
    praeCAM <-
      CAMfiles_pre[[1]][CAMfiles_pre[[1]]$participantCAM == unique(CAMfiles_pre[[1]]$participantCAM)[i],]
    postCAM <-
      CAMfiles_post[[1]][CAMfiles_post[[1]]$participantCAM == unique(CAMfiles_post[[1]]$participantCAM)[i],]
    
    ## to test:
    # praeCAM$text %in% postCAM$text
    # postCAM$text %in% praeCAM$text
    # length(praeCAM$text)
    # length(postCAM$text)
    # praeCAM$text
    # postCAM$text
    
    ## Typ A
    if (all(postCAM$text %in% praeCAM$text) &
        length(postCAM$text) < length(praeCAM$text)) {
      vec_type[i] <- "A"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
    }
    
    ## Typ B
    if (all(praeCAM$text %in% postCAM$text) &
        length(postCAM$text) > length(praeCAM$text)) {
      vec_type[i] <- "B"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
      
      ## get words and values
      list_newWords_text[[h]] <-
        postCAM$text[!postCAM$text %in% praeCAM$text]
      list_newWords_value[[h]] <-
        postCAM$value[!postCAM$text %in% praeCAM$text]
      list_ids[[h]] <- postCAM$id[!postCAM$text %in% praeCAM$text]
      
      h = h + 1
    }
    
    ## Typ C
    if (all(praeCAM$text %in% postCAM$text) &
        all(postCAM$text %in% praeCAM$text)) {
      vec_type[i] <- "C"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
    }
    
    ## Typ D
    # smaller > pr? UE post, post UE pr?
    if (sum(praeCAM$text %in% postCAM$text) < length(praeCAM$text) &
        sum(postCAM$text %in% praeCAM$text) < length(postCAM$text)) {
      vec_type[i] <- "D"
      if (verbose) {
        cat("\n i:", i, "type:", vec_type[i], "\n")
      }
      error = error + 1
    }
    
    if (error > 1) {
      print("ERROR in (not exclusive logical condition)", i)
      stop("check your data and adjust this function")
    }
    error = 0
  }
}


table(vec_type)

vec_type
B C D 
2 5 4

barplot(table(vec_type))

example for newly added words:

barplot(table(unlist(list_newWords_value)))

# sort(table(unlist(list_newWords_text)))


### add data
# nrow(questionnaire); length(vec_type)
# questionnaire$typeChange <- vec_type 


dat_newWords <- data.frame(id = unlist(list_ids), 
                           text = unlist(list_newWords_text), 
                           value = unlist(list_newWords_value))
DT::datatable(dat_newWords, options = list(pageLength = 5))

merge and save all data

setwd("outputs/CAMs")


if(all(unique(CAMfiles_pre[[1]]$participantCAM) == networkIndicators_pre$participantCAM)){
    print("pre CAM ID can be set")
  networkIndicators_pre$CAM_ID <- unique(CAMfiles_pre[[1]]$CAM)
}

[1] "pre CAM ID can be set"

# CAMfiles_post[[1]]$participantCAM <- backupIDs_post 
if(all(unique(CAMfiles_post[[1]]$participantCAM) == networkIndicators_post$participantCAM)){
    print("post CAM ID can be set")
  networkIndicators_post$CAM_ID <- unique(CAMfiles_post[[1]]$CAM)
}

[1] "post CAM ID can be set"

### remove all previously removed participants
length(unique(CAMfiles_combined[[1]]$participantCAM))

[1] 11

nrow(networkIndicators_pre)

[1] 11

nrow(networkIndicators_post)

[1] 11

nrow(merged_data)

[1] 11

networkIndicators_pre <-
  networkIndicators_pre[networkIndicators_pre$participantCAM %in% CAMfiles_combined[[1]]$participantCAM,]
networkIndicators_post <-
  networkIndicators_post[networkIndicators_post$participantCAM %in% CAMfiles_combined[[1]]$participantCAM,]


nrow(networkIndicators_pre)

[1] 11

nrow(networkIndicators_post)

[1] 11

### match data
if (all(merged_data$PROLIFIC_PID == networkIndicators_pre$participantCAM) &
    all(networkIndicators_pre$participantCAM == networkIndicators_post$participantCAM) & 
    all(unique(CAMfiles_combined[[1]]$participantCAM) == networkIndicators_post$participantCAM)) {
  print("all data can be matched row by row")
  
  
  # fix IDs
  networkIndicators_pre$participantCAM <- paste0(networkIndicators_pre$participantCAM, "_pre")
  networkIndicators_post$participantCAM <- paste0(networkIndicators_post$participantCAM, "_post")
  
  
  # save questionnaire
  merged_data$participantCAMpre <- networkIndicators_pre$participantCAM
  merged_data$participantCAMpost <- networkIndicators_post$participantCAM

  ## save as .xlsx file
  xlsx::write.xlsx2(x = merged_data, file = "merged_data_final.xlsx")
  ## save as R object
  saveRDS(merged_data, file = "merged_data_final.rds")
  
  
  # save network indicators pre
  ## save as .xlsx file
  xlsx::write.xlsx2(x = networkIndicators_pre, file = "networkIndicators_pre_final.xlsx")
  ## save as R object
  saveRDS(networkIndicators_pre, file = "networkIndicators_pre_final.rds")
  
  # save network indicators post
  ## save as .xlsx file
  xlsx::write.xlsx2(x = networkIndicators_post, file = "networkIndicators_post_final.xlsx")
  ## save as R object
  saveRDS(networkIndicators_post, file = "networkIndicators_post_final.rds")
  
  
  # save CAMfiles pre
  saveRDS(CAMfiles_pre, file = "CAMfiles_pre_final.rds")
  
  # save CAMfiles post
  saveRDS(CAMfiles_post, file = "CAMfiles_post_final.rds")
  
  # save CAMfiles combined and clean
  saveRDS(CAMfiles_combined, file = "CAMfiles_combined_final.rds")
  
    # save CAMfiles combined and clean
  # saveRDS(CAMfiles_combined_translated, file = "CAMfiles_combined_final_translated.rds")
  
  # save questionnaire combined with CAMs
  colnames(networkIndicators_pre) <- paste0(colnames(networkIndicators_pre), "_pre")
  colnames(networkIndicators_post) <- paste0(colnames(networkIndicators_post), "_post")

  merged_dataCAMs <- cbind(merged_data, networkIndicators_pre, networkIndicators_post)
  dim(merged_dataCAMs)
  
  ## save as .xlsx file
  xlsx::write.xlsx2(x = merged_dataCAMs, file = "merged_dataCAMs_final.xlsx")
  ## save as R object
  saveRDS(merged_dataCAMs, file = "merged_dataCAMs_final.rds")
}

[1] "all data can be matched row by row"

create data frames of concepts constant (C), deleted (D), new (N)

dat_pre_out <- data.frame()
dat_post_out <- data.frame()


for (i in 1:nrow(merged_dataCAMs)) {
  tmp_pre <-
    CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% merged_dataCAMs$CAM_ID_pre[i],]
  tmp_post <-
    CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% merged_dataCAMs$CAM_ID_post[i],]
  
  
  ## get date of concepts drawn by data collection tool (no interaction by user) - heuristic !!!:
  date_tmp <-
    tmp_post$date[tmp_post$date - min(tmp_post$date) <= .5] # less than half a second
  
  
  
  
  dat_pre <-
    tmp_pre[, c("participantCAM",
                "id" ,
                "text",
                "value",
                "comment",
                "date",
                "x_pos",
                "y_pos")]
  
  dat_post <-
    tmp_post[, c("participantCAM",
                 "id" ,
                 "text",
                 "value",
                 "comment",
                 "date",
                 "x_pos",
                 "y_pos")]
  
  
  ## set variables:
  if (nrow(dat_pre) >= 1) {
    ## indicate type of concept:
    dat_pre$typeConcept <- NA
    
    checkOldDeleted <- FALSE
  }
  
  
  if (nrow(dat_post) >= 1) {
    dat_post$originalConcept_date <- dat_post$date %in% date_tmp
    
    ## check if positions have changed - heuristic !!!:
    dat_post$originalConcept_position <- FALSE
    ## indicate if position was changed
    dat_post$changedPosition <- FALSE
    ## indicate if valence was changed
    dat_post$changedValence <- FALSE
    
    ## indicate type of concept:
    dat_post$typeConcept <- NA
    
    checkNewAdded <- FALSE
  }
  
  
  
  ## loop through if both data sets exists
  if (nrow(dat_pre) >= 1 && nrow(dat_post) >= 1) {
    #> through dat_pre
    for (p in 1:nrow(dat_pre)) {
      ## check positon:
      pos_tmp <- dat_pre[p, c("x_pos", "y_pos")]
      
      matching_id <-
        dat_post[dat_post$x_pos == pos_tmp$x_pos &
                   dat_post$y_pos == pos_tmp$y_pos, "id"]
      
      dat_post$originalConcept_position[dat_post$id == matching_id] <-
        TRUE
      
      ## check valence
      value_tmp <-
        dat_post$value[dat_post$text %in% dat_pre$text[p]]
      
      if (length(value_tmp) != 0) {
        if (dat_pre$value[p] != value_tmp[1]) {
          dat_post$changedValence[dat_post$text %in% dat_pre$text[p]] <- TRUE
        }
      }
      
      ## indicate type of concept:
      if (dat_pre$text[p] %in% dat_post$text) {
        dat_pre$typeConcept[p] <- "constant"
      } else{
        dat_pre$typeConcept[p] <- "deleted"
        # cat("\n deleted concepts:\n", "in i:", i, ", c:", c, "p:", p, "\n")
        checkOldDeleted <- TRUE
      }
    }
    
    #> through dat_post
    for (q in 1:nrow(dat_post)) {
      ## indicate type of concept:
      if (dat_post$text[q] %in% dat_pre$text) {
        dat_post$typeConcept[q] <- "constant"
      } else{
        dat_post$typeConcept[q] <- "new"
        # cat("\n new concepts:\n", "in i:", i, ", c:", c, "p:", p, "\n")
        checkNewAdded <- TRUE
      }
      
      ## indicate if concept changed positon
      if (dat_post$originalConcept_date[q] &&
          !dat_post$originalConcept_position[q]) {
        dat_post$changedPosition[q] <- TRUE
      }
    }
    
    
    if (all(checkOldDeleted, checkNewAdded)) {
      # cat("\n deleted concepts, new concepts:\n", "in i:", i, ", c:", c, "p:", p, "\n")
    }
  }
  
  
  
  
  ## indicate type of concept:
  if (nrow(dat_post) == 0) {
    dat_pre$typeConcept <- "deleted"
  }
  
  if (nrow(dat_pre) == 0) {
    dat_post$typeConcept <- "new"
  }
  
  if (any(is.na(dat_post$typeConcept))) {
    cat("\n NA typeConcept:\n", "in i:", i, ", c:", c, "p:", p, "\n")
    stop()
  }
  
  dat_pre_out <- rbind(dat_pre_out, dat_pre)
  dat_post_out <- rbind(dat_post_out, dat_post)
}

# check if I missed any typeConcept
sum(is.na(dat_pre_out$typeConcept))

[1] 0

sum(is.na(dat_post_out$typeConcept))

[1] 0

# check if any data set was multiple times added
sum(table(dat_pre_out$id) >= 2)

[1] 0

sum(table(dat_post_out$id) >= 2)

[1] 0

# number of entries
nrow(dat_pre_out)

[1] 196

nrow(dat_post_out)

[1] 202

# nrow(dat_post_out) - 535 + 68

# types
table(dat_pre_out$typeConcept)


constant  deleted 
     178       18

table(dat_post_out$typeConcept)


constant      new 
     178       24

table(dat_post_out$changedPosition)


FALSE  TRUE 
  188    14

table(dat_post_out$changedValence)


FALSE  TRUE 
  199     3

setwd("outputs/CAM_concepts")
xlsx::write.xlsx2(x = dat_pre_out, file = "concepts_preIntervention.xlsx")
xlsx::write.xlsx2(x = dat_post_out, file = "concepts_postIntervention.xlsx")

describe sample

pupils who participated:

psych::describe(x = questionnaire_c5$sociodemo_age)

   vars n  mean   sd median trimmed  mad min max range  skew kurtosis   se
X1    1 9 22.44 1.01     23   22.44 1.48  21  24     3 -0.19    -1.42 0.34

table(questionnaire_c5$sociodemo_gender)


female   male 
     8      1

table(questionnaire_c5$sociodemo_language)


 no yes 
  1   8

CAMs:

psych::describe(x = networkIndicators_combined[,c("mean_valence_macro", "num_nodes_macro", "num_edges_macro")])

                   vars  n  mean    sd median trimmed  mad min   max range skew
mean_valence_macro    1 22  0.80  0.60   0.74    0.77 0.72   0  1.85  1.85 0.27
num_nodes_macro       2 22 18.05  4.72  16.50   17.72 3.71  12 27.00 15.00 0.73
num_edges_macro       3 22 26.77 11.50  24.00   25.56 6.67  12 53.00 41.00 0.94
                   kurtosis   se
mean_valence_macro    -1.38 0.13
num_nodes_macro       -0.71 1.01
num_edges_macro       -0.13 2.45

psych::describe(x = networkIndicators_combined[,str_subset(string = colnames(networkIndicators_combined), pattern = "degreetot_micro")])

                                       vars  n mean   sd median trimmed  mad
degreetot_micro_BNE                       1 22 6.32 2.64    6.0    6.17 1.48
degreetot_micro_BNE-fächerbezogen         2 22 3.86 3.06    3.0    3.28 1.48
degreetot_micro_BNERelevanz               3 22 4.23 1.90    4.0    4.06 1.48
degreetot_micro_EmpirischeFachdidaktik    4 22 3.45 2.40    2.5    3.17 2.22
degreetot_micro_Wissenschaft-Interesse    5 22 3.50 2.67    2.0    3.11 1.48
degreetot_micro_Verantwortung             6 22 4.23 2.39    3.0    3.94 1.48
                                       min max range skew kurtosis   se
degreetot_micro_BNE                      2  12    10 0.69    -0.56 0.56
degreetot_micro_BNE-fächerbezogen        1  13    12 1.76     2.22 0.65
degreetot_micro_BNERelevanz              2  10     8 1.16     1.35 0.41
degreetot_micro_EmpirischeFachdidaktik   1   9     8 0.97    -0.25 0.51
degreetot_micro_Wissenschaft-Interesse   1  10     9 1.19     0.08 0.57
degreetot_micro_Verantwortung            2  10     8 0.82    -0.52 0.51

networkIndicators_combined_wide <- cbind(networkIndicators_pre, networkIndicators_post)


plot(networkIndicators_combined_wide$mean_valence_macro_pre, networkIndicators_combined_wide$mean_valence_macro_post)
abline(a=0, b=1)

plot(networkIndicators_combined_wide$num_nodes_macro_pre, networkIndicators_combined_wide$num_nodes_macro_post)
abline(a=0, b=1)

References

Peng, Roger D., and Elizabeth Matsui. 2016. The Art of Data Science: A Guide for Anyone Who Works with Data. Lulu.com. https://bookdown.org/rdpeng/artofdatascience/.

Wickham, Hadley, and Garrett Grolemund. 2017. R for Data Science: Import, Tidy, Transform, Visualize, and Model Data. "O’Reilly Media, Inc.". https://r4ds.had.co.nz/.

Xie, Yihui, J. J. Allaire, and Garrett Grolemund. 2018. R Markdown: The Definitive Guide. New York: Chapman; Hall/CRC. https://doi.org/10.1201/9781138359444.