= FALSE
save_CAMs_as_pictures # consider_Protocol = FALSE # not needed at current stage
Data Preperation
Background Information
This is an R Markdown document. Instructions for writing these documents and background information can be found in the book written by Xie, Allaire, and Grolemund (2018) When you execute code within the document, the results appear beneath the code. This is an R Markdown document. Instructions for writing these documents and background information can be found in the book written by Xie, Allaire, and Grolemund (2018) When you execute code within the document, the results appear beneath the code. This file contains summary statistics, respectively the analysis step (confirmatory and exploratory analyses). Files are split into multiple subfiles like data processing and data analyses steps, which follows the classical data-analysis pipeline (see Peng and Matsui 2016; Wickham and Grolemund 2017).
Global variables
create raw data files
# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))
### load packages
require(pacman)
p_load('tidyverse', 'jsonlite', 'magrittr',
'stargazer', 'psych', 'jtools', 'DT', 'igraph',
'writexl')
### load socio-demographic data
setwd("data_demographic")
<- read.csv(file = "prolific_export_682f1c80a267ba868f8a1af8.csv", header = TRUE)
prolific1 <- read.csv(file = "prolific_export_68261e4765055a12fd0d2dd9.csv", header = TRUE)
prolific2 <- rbind(prolific1, prolific2)
prolific
### list data files
setwd("../data")
<- list.files(pattern = "^study_result.*")
folders
### create data files - GERMANY
# get CAM data
writeLines("", "CAMdata.txt") # create file
<- file("CAMdata.txt", "a") # open connection to append
text_connection
# get CAM data second
writeLines("", "secondCAMdata.txt") # create file
<- file("secondCAMdata.txt", "a") # open connection to append
text_connection_second
# get pre CAM data
writeLines("", "preCAM.txt") # create file
<- file("preCAM.txt", "a") # open connection to append
text_connection_pre
# get post CAM data
writeLines("", "postCAM.txt") # create file
<- file("postCAM.txt", "a") # open connection to append
text_connection_post
# get post second CAM data
writeLines("", "secondPostCAM.txt") # create file
<- file("secondPostCAM.txt", "a") # open connection to append
text_connection_postSecond
for(i in 1:length(folders)){
setwd(folders[i])
if(length(dir()) == 5){
# print(i)
# pre CAM data
setwd(dir()[1])
<- jsonlite::fromJSON(txt = "data.txt")
tmp <- tmp$ID[!is.na(tmp$ID)]
tmp_id writeLines(jsonlite::toJSON(x = tmp), text_connection_pre)
setwd("..")
# CAM data
setwd(dir()[2])
<- jsonlite::fromJSON(txt = "data.txt")
tmp if(tmp$creator != tmp_id){
warning("IDs (primary keys) not matching")
}writeLines(jsonlite::toJSON(x = tmp), text_connection)
setwd("..")
# post CAM data
setwd(dir()[3])
<- jsonlite::fromJSON(txt = "data.txt")
tmp $ID <- NA
tmp$ID[2] <- tmp_id
tmpwriteLines(jsonlite::toJSON(x = tmp), text_connection_post)
setwd("..")
# CAM data second
setwd(dir()[4])
<- jsonlite::fromJSON(txt = "data.txt")
tmp if(tmp$creator != tmp_id){
warning("IDs (primary keys) not matching")
}# tmp$creator <- paste0(tmp$creator, "_t2")
writeLines(jsonlite::toJSON(x = tmp), text_connection_second)
setwd("..")
# post CAM data second
setwd(dir()[5])
<- jsonlite::fromJSON(txt = "data.txt")
tmp $ID <- NA
tmp$ID[2] <- tmp_id
tmpwriteLines(jsonlite::toJSON(x = tmp), text_connection_postSecond)
setwd("..")
}setwd("..")
}
close(text_connection) # close connection CAM
close(text_connection_pre) # close connection
close(text_connection_post) # close connection
close(text_connection_second) # close connection CAM
close(text_connection_postSecond) # close connection
### move files to output folder
# copy files (not overwritten)
<- getwd()
tmp_file_from setwd("../outputs")
file.copy(from = paste0(tmp_file_from, "/CAMdata.txt"), to = paste0(getwd(), "/CAMdata.txt"))
[1] FALSE
file.copy(from = paste0(tmp_file_from, "/preCAM.txt"), to = paste0(getwd(), "/preCAM.txt"))
[1] FALSE
file.copy(from = paste0(tmp_file_from, "/postCAM.txt"), to = paste0(getwd(), "/postCAM.txt"))
[1] FALSE
file.copy(from = paste0(tmp_file_from, "/secondPostCAM.txt"), to = paste0(getwd(), "/secondPostCAM.txt"))
[1] FALSE
file.copy(from = paste0(tmp_file_from, "/secondCAMdata.txt"), to = paste0(getwd(), "/secondCAMdata.txt"))
[1] FALSE
# remove files
file.remove(paste0(tmp_file_from, "/CAMdata.txt"))
[1] TRUE
file.remove(paste0(tmp_file_from, "/preCAM.txt"))
[1] TRUE
file.remove(paste0(tmp_file_from, "/postCAM.txt"))
[1] TRUE
file.remove(paste0(tmp_file_from, "/secondPostCAM.txt"))
[1] TRUE
file.remove(paste0(tmp_file_from, "/secondCAMdata.txt"))
[1] TRUE
### load functions
# print(getwd())
setwd("../functions")
for(i in 1:length(dir())){
# print(dir()[i])
source(dir()[i], encoding = "utf-8")
}
setwd("../functions_CAMapp")
for(i in 1:length(dir())){
# print(dir()[i])
source(dir()[i], encoding = "utf-8")
}rm(i)
### summary function
<- function(data, varname, groupnames){
data_summary require(plyr)
<- function(x, col){
summary_func c(mean = mean(x[[col]], na.rm=TRUE),
se = sd(x[[col]], na.rm=TRUE) / sqrt(length(x[[col]])))
}<-ddply(data, groupnames, .fun=summary_func,
data_sum
varname)<- plyr::rename(data_sum, c("mean" = varname))
data_sum return(data_sum)
}
rm(tmp); rm(tmp_id); rm(folders); rm(tmp_file_from)
rm(prolific1); rm(prolific2)
rm(text_connection); rm(text_connection_post); rm(text_connection_postSecond); rm(text_connection_pre); rm(text_connection_second)
merge CAM data sets
setwd("outputs")
# Read the contents of both files
<- readLines("CAMdata.txt")
cam_content <- readLines("secondCAMdata.txt")
second_cam_content
# Create a new file and write combined contents
writeLines(c(cam_content, second_cam_content), "mergedCAMdata.txt")
rm(cam_content)
rm(second_cam_content)
set up data.frame questionnaires
load data:
setwd("outputs")
# > pre study
suppressMessages(read_file('preCAM.txt') %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
# ... filter empty rows ...
discard(function(x) x == '') %>%
discard(function(x) x == '\r') %>%
# ... parse JSON into a data.frame
map_dfr(fromJSON, flatten=TRUE)) -> dat_preCAM
# > post first CAM
suppressMessages(read_file('postCAM.txt') %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
# ... filter empty rows ...
discard(function(x) x == '') %>%
discard(function(x) x == '\r') %>%
# ... parse JSON into a data.frame
map_dfr(fromJSON, flatten=TRUE)) -> dat_postCAM
# > post second CAM
suppressMessages(read_file('secondPostCAM.txt') %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
# ... filter empty rows ...
discard(function(x) x == '') %>%
discard(function(x) x == '\r') %>%
# ... parse JSON into a data.frame
map_dfr(fromJSON, flatten=TRUE)) -> dat_secondPostCAM
rename ID variable for data sets:
colnames(dat_preCAM)[colnames(dat_preCAM) == "ID"] <- "PROLIFIC_PID"
colnames(dat_postCAM)[colnames(dat_postCAM) == "ID"] <- "PROLIFIC_PID"
colnames(dat_secondPostCAM)[colnames(dat_secondPostCAM) == "ID"] <- "PROLIFIC_PID"
add unique ID variable to match rows to participants, only keep complete data sets:
### create counter variable for both data sets
# pre study
$ID <- NA
dat_preCAM
<- 0
tmp_IDcounter for(i in 1:nrow(dat_preCAM)){
if(!is.na(dat_preCAM$sender[i]) && dat_preCAM$sender[i] == "Greetings"){
# tmp <- dat_preCAM$prolific_pid[i]
= tmp_IDcounter + 1
tmp_IDcounter
}$ID[i] <- tmp_IDcounter
dat_preCAM
}
# post study
$ID <- NA
dat_postCAM
<- 0
tmp_IDcounter for(i in 1:nrow(dat_postCAM)){
if(!is.na(dat_postCAM$sender[i]) && dat_postCAM$sender[i] == "CAMfeedbackGeneral"){
# tmp <- dat_postCAM$prolific_pid[i]
= tmp_IDcounter + 1
tmp_IDcounter
}$ID[i] <- tmp_IDcounter
dat_postCAM
}
# second post study
#> fix error in "sender variable"
for(i in 1:nrow(dat_secondPostCAM)){
if(is.na(dat_secondPostCAM$sender[i])){
if(!is.na(dat_secondPostCAM$sender[i+1])){
$sender[i] <- "adaptiveQuestion_Feedback"
dat_secondPostCAM
}
}
}
$ID <- NA
dat_secondPostCAM<- 0
tmp_IDcounter for(i in 1:nrow(dat_secondPostCAM)){
if(!is.na(dat_secondPostCAM$sender[i]) && dat_secondPostCAM$sender[i] == "break500ms_2"){
# tmp <- dat_secondPostCAM$prolific_pid[i]
= tmp_IDcounter + 1
tmp_IDcounter
}$ID[i] <- tmp_IDcounter
dat_secondPostCAM
}
### keep only complete data sets
# pre-study
# sort(table(dat_preCAM$ID))
sum(table(dat_preCAM$ID) != max(table(dat_preCAM$ID)))
[1] 0
sum(table(dat_preCAM$ID) == max(table(dat_preCAM$ID)))
[1] 21
<- dat_preCAM[dat_preCAM$ID %in% names(table(dat_preCAM$ID))[table(dat_preCAM$ID) == max(table(dat_preCAM$ID))],]
dat_preCAM
# post-study
# sort(table(dat_postCAM$ID))
sum(table(dat_postCAM$ID) != max(table(dat_postCAM$ID)))
[1] 2
sum(table(dat_postCAM$ID) == max(table(dat_postCAM$ID)))
[1] 20
# dat_postCAM <- dat_postCAM[dat_postCAM$ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) == max(table(dat_postCAM$ID))],]
<- dat_postCAM[dat_postCAM$ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) >= 4],]
dat_postCAM
# post-study second
# sort(table(dat_secondPostCAM$ID))
sum(table(dat_secondPostCAM$ID) != max(table(dat_secondPostCAM$ID)))
[1] 0
sum(table(dat_secondPostCAM$ID) == max(table(dat_secondPostCAM$ID)))
[1] 21
# dat_secondPostCAM <- dat_secondPostCAM[dat_secondPostCAM$ID %in% names(table(dat_secondPostCAM$ID))[table(dat_secondPostCAM$ID) == max(table(dat_secondPostCAM$ID))],]
<- dat_secondPostCAM[dat_secondPostCAM$ID %in% names(table(dat_secondPostCAM$ID))[table(dat_secondPostCAM$ID) >= 11],]
dat_secondPostCAM
all(unique(dat_preCAM$ID) %in% unique(dat_postCAM$ID))
[1] TRUE
all(unique(dat_preCAM$ID) %in% unique(dat_secondPostCAM$ID))
[1] TRUE
questionnaire pre-study (component 1)
colnames(dat_preCAM)
[1] "sender" "sender_type"
[3] "sender_id" "6"
[5] "ended_on" "duration"
[7] "time_run" "time_render"
[9] "time_show" "time_end"
[11] "time_commit" "timestamp"
[13] "time_switch" "currentLocation"
[15] "choosenManipulationCheck" "IDtype"
[17] "PROLIFIC_PID" "para_countShowRights"
[19] "dummy_informedconsent" "commCheck"
[21] "sociodemo_age" "sociodemo_gender"
[23] "sociodemo_transport_mode" "sociodemo_transport_mode_other"
[25] "sociodemo_net_income" "meta.labjs_version"
[27] "meta.location" "meta.userAgent"
[29] "meta.platform" "meta.language"
[31] "meta.locale" "meta.timeZone"
[33] "meta.timezoneOffset" "meta.screen_width"
[35] "meta.screen_height" "meta.scroll_width"
[37] "meta.scroll_height" "meta.window_innerWidth"
[39] "meta.window_innerHeight" "meta.devicePixelRatio"
[41] "meta.labjs_build.flavor" "meta.labjs_build.commit"
[43] "para_defocuscount" "7"
[45] "ID"
<- str_subset(string = colnames(dat_preCAM), pattern = "^meta|^R")
tmp_notNumeric <- str_subset(string = tmp_notNumeric, pattern = "labjs|location", negate = TRUE)
tmp_notNumeric
<- c("PROLIFIC_PID",
vec_ques "currentLocation", "IDtype",
"choosenManipulationCheck",
"para_countShowRights",
"dummy_informedconsent",
"commCheck",
"sociodemo_age", "sociodemo_gender", "sociodemo_transport_mode", "sociodemo_transport_mode_other", "sociodemo_net_income",
tmp_notNumeric)
= c("PROLIFIC_PID", "currentLocation", "IDtype", "choosenManipulationCheck",
vec_notNumeric "sociodemo_gender", "sociodemo_transport_mode", "sociodemo_transport_mode_other", "sociodemo_net_income", tmp_notNumeric)
<- questionnairetype(dataset = dat_preCAM,
questionnaire_c1 listvars = vec_ques,
notNumeric = vec_notNumeric, verbose = FALSE)
$sociodemo_gender <- factor(questionnaire_c1$sociodemo_gender)
questionnaire_c1$sociodemo_net_income <- factor(questionnaire_c1$sociodemo_net_income)
questionnaire_c1$sociodemo_transport_mode <- factor(questionnaire_c1$sociodemo_transport_mode)
questionnaire_c1
dim(questionnaire_c1)
[1] 21 26
colnames(questionnaire_c1)
[1] "ID" "PROLIFIC_PID"
[3] "currentLocation" "IDtype"
[5] "choosenManipulationCheck" "para_countShowRights"
[7] "dummy_informedconsent" "commCheck"
[9] "sociodemo_age" "sociodemo_gender"
[11] "sociodemo_transport_mode" "sociodemo_transport_mode_other"
[13] "sociodemo_net_income" "meta.userAgent"
[15] "meta.platform" "meta.language"
[17] "meta.locale" "meta.timeZone"
[19] "meta.timezoneOffset" "meta.screen_width"
[21] "meta.screen_height" "meta.scroll_width"
[23] "meta.scroll_height" "meta.window_innerWidth"
[25] "meta.window_innerHeight" "meta.devicePixelRatio"
questionnaire post-CAM (component 3)
colnames(dat_postCAM)
[1] "sender" "sender_type"
[3] "sender_id" "ended_on"
[5] "duration" "time_run"
[7] "time_render" "time_show"
[9] "time_end" "time_commit"
[11] "timestamp" "time_switch"
[13] "feedCAM_repres" "feedCAM_technicalprobs"
[15] "feedCAM_technicalprobsText" "feedCAM_already"
[17] "feedCAM_alreadyText" "18"
[19] "PROLIFIC_PID" "law_positive_negative"
[21] "law_fairness" "law_effectiveness"
[23] "law_acceptability" "law_petition"
[25] "law_demonstration" "law_demonstration_against"
[27] "scenario_thoughts" "para_countShowScenario"
[29] "para_defocuscount" "19"
[31] "ID"
<- c("PROLIFIC_PID",
vec_ques "feedCAM_repres",
"feedCAM_technicalprobs", "feedCAM_technicalprobsText",
"feedCAM_already", "feedCAM_alreadyText",
"scenario_thoughts", "para_countShowScenario")
= c("PROLIFIC_PID",
vec_notNumeric "feedCAM_technicalprobsText", "feedCAM_alreadyText", "scenario_thoughts")
<- questionnairetype(dataset = dat_postCAM,
questionnaire_c3 listvars = vec_ques,
notNumeric = vec_notNumeric, verbose = FALSE)
dim(questionnaire_c3)
[1] 21 9
colnames(questionnaire_c3)
[1] "ID" "PROLIFIC_PID"
[3] "feedCAM_repres" "feedCAM_technicalprobs"
[5] "feedCAM_technicalprobsText" "feedCAM_already"
[7] "feedCAM_alreadyText" "scenario_thoughts"
[9] "para_countShowScenario"
questionnaire post-second-CAM (component 5):
colnames(dat_secondPostCAM)
[1] "sender" "sender_type"
[3] "sender_id" "ended_on"
[5] "duration" "time_run"
[7] "time_render" "time_show"
[9] "time_end" "time_commit"
[11] "timestamp" "time_switch"
[13] "changeCAM_valence" "14"
[15] "PROLIFIC_PID" "law_positive_negative"
[17] "law_fairness" "law_effectiveness"
[19] "law_acceptability" "law_petition"
[21] "law_demonstration" "law_demonstration_against"
[23] "ease_mindmap" "ease_scenario"
[25] "guttman-item" "not_needed"
[27] "guttman-response" "feedback_critic"
[29] "ID"
<- c("PROLIFIC_PID",
vec_ques "changeCAM_valence",
"ease_mindmap", "ease_scenario", "feedback_critic")
= c("PROLIFIC_PID",
vec_notNumeric "changeCAM_valence",
"feedback_critic")
<- questionnairetype(dataset = dat_secondPostCAM,
questionnaire_c5 listvars = vec_ques,
notNumeric = vec_notNumeric, verbose = FALSE)
dim(questionnaire_c5)
[1] 21 6
colnames(questionnaire_c5)
[1] "ID" "PROLIFIC_PID" "changeCAM_valence"
[4] "ease_mindmap" "ease_scenario" "feedback_critic"
get ratings of law and guttman
# Pre-define the column names with rat_ pattern
<- sort(str_subset(string = colnames(dat_postCAM), pattern = "^law"))
law_cols <- sort(str_subset(string = colnames(dat_secondPostCAM), pattern = "^guttman"))
guttman_cols
# to merge data sets
for(i in 1:length(guttman_cols)){
<- NA
dat_postCAM[[guttman_cols[i]]]
}
# Create an empty list to store processed data
<- list()
result_list
<- rbind(dat_postCAM[, c("ID", "PROLIFIC_PID", "sender", law_cols, guttman_cols)], dat_secondPostCAM[, c("ID", "PROLIFIC_PID", "sender", law_cols, guttman_cols)])
dat_merged
# Loop over each unique ID
for (i in unique(dat_merged$ID)) {
<- dat_merged[dat_merged$ID == i, c("PROLIFIC_PID", "sender", law_cols, guttman_cols)]
tmp
# Fill down PROLIFIC_PID if missing
<- tmp %>%
tmp fill(PROLIFIC_PID, .direction = "downup")
# Filter rows where all rat_ columns are not NA
<- tmp %>%
tmp filter(
if_any(all_of(law_cols), ~ !is.na(.)) | # at least one law_* value present OR
if_any(all_of(guttman_cols), ~ !is.na(.)) # at least one guttman_* value present
)$sender[is.na(tmp$sender)] <- paste0("GI_", tmp$`guttman-item`[!is.na(tmp$`guttman-item`)])
tmp
# Append to result list
as.character(i)]] <- tmp
result_list[[
}
# Combine all into one dataframe
<- bind_rows(result_list)
questionnaire_rating_long $sender <- factor(questionnaire_rating_long$sender)
questionnaire_rating_long
<- questionnaire_rating_long %>%
questionnaire_rating_long mutate(across(starts_with("law_"), as.numeric))
$law_mean <- rowMeans(x = questionnaire_rating_long[, str_subset(string = colnames(questionnaire_rating_long), pattern = "^law_")])
questionnaire_rating_long
$naming <- NA
questionnaire_rating_long$naming[questionnaire_rating_long$sender == "policyRating_I"] <- "I"
questionnaire_rating_long$naming[questionnaire_rating_long$sender == "policyRating_I_Manipulation"] <- "Ib"
questionnaire_rating_long$naming[questionnaire_rating_long$sender == "policyRating_II"] <- "II"
questionnaire_rating_long
# wide data set for law ratings
<- questionnaire_rating_long[!is.na(questionnaire_rating_long$naming),] %>%
questionnaire_rating_wide_law pivot_wider(
id_cols = c(PROLIFIC_PID, PROLIFIC_PID),
names_from = naming,
values_from = c(law_acceptability, law_demonstration, law_demonstration_against, law_effectiveness, law_fairness, law_petition, law_mean),
names_glue = "{.value}_{naming}"
)
# wide data set for guttman ratings
colnames(questionnaire_rating_long)[colnames(questionnaire_rating_long) == "guttman-item"] <- "item"
colnames(questionnaire_rating_long)[colnames(questionnaire_rating_long) == "guttman-response"] <- "response"
<- questionnaire_rating_long[is.na(questionnaire_rating_long$naming),] %>%
questionnaire_rating_wide_guttman pivot_wider(
id_cols = c(PROLIFIC_PID, PROLIFIC_PID),
names_from = item ,
values_from = c(response),
names_glue = "{.value}_{item }"
)
merge all data sets
# Start with the first dataset
<- questionnaire_c1
merged_data
# Left join the others one by one
<- merged_data %>%
merged_data left_join(questionnaire_c3, by = "PROLIFIC_PID") %>%
left_join(questionnaire_c5, by = "PROLIFIC_PID") %>%
left_join(questionnaire_rating_wide_law, by = "PROLIFIC_PID") %>%
left_join(questionnaire_rating_wide_guttman, by = "PROLIFIC_PID")
$ID.x <- NULL
merged_data$ID.y <- NULL
merged_data
dim(merged_data)
[1] 21 62
colnames(merged_data)
[1] "PROLIFIC_PID" "currentLocation"
[3] "IDtype" "choosenManipulationCheck"
[5] "para_countShowRights" "dummy_informedconsent"
[7] "commCheck" "sociodemo_age"
[9] "sociodemo_gender" "sociodemo_transport_mode"
[11] "sociodemo_transport_mode_other" "sociodemo_net_income"
[13] "meta.userAgent" "meta.platform"
[15] "meta.language" "meta.locale"
[17] "meta.timeZone" "meta.timezoneOffset"
[19] "meta.screen_width" "meta.screen_height"
[21] "meta.scroll_width" "meta.scroll_height"
[23] "meta.window_innerWidth" "meta.window_innerHeight"
[25] "meta.devicePixelRatio" "feedCAM_repres"
[27] "feedCAM_technicalprobs" "feedCAM_technicalprobsText"
[29] "feedCAM_already" "feedCAM_alreadyText"
[31] "scenario_thoughts" "para_countShowScenario"
[33] "ID" "changeCAM_valence"
[35] "ease_mindmap" "ease_scenario"
[37] "feedback_critic" "law_acceptability_I"
[39] "law_acceptability_Ib" "law_acceptability_II"
[41] "law_demonstration_I" "law_demonstration_Ib"
[43] "law_demonstration_II" "law_demonstration_against_I"
[45] "law_demonstration_against_Ib" "law_demonstration_against_II"
[47] "law_effectiveness_I" "law_effectiveness_Ib"
[49] "law_effectiveness_II" "law_fairness_I"
[51] "law_fairness_Ib" "law_fairness_II"
[53] "law_petition_I" "law_petition_Ib"
[55] "law_petition_II" "law_mean_I"
[57] "law_mean_Ib" "law_mean_II"
[59] "response_1" "response_4"
[61] "response_2" "response_3"
### add prolific data
<- prolific[prolific$Participant.id %in% merged_data$PROLIFIC_PID,]
prolific <- prolific %>%
prolific arrange(sapply(Participant.id, function(y) which(y == merged_data$PROLIFIC_PID)))
if(all(prolific$Participant.id == merged_data$PROLIFIC_PID)){
print("prolific data sucessfully added")
$socio_age <- prolific$Age
merged_data$socio_sex <- prolific$Sex
merged_data$socio_ethnicity <- prolific$Ethnicity.simplified
merged_data$socio_student <- prolific$Student.status
merged_data$socio_employment <- prolific$Employment.status
merged_data$socio_car <- prolific$Car.ownership
merged_data$total_min_prolific <- prolific$Time.taken / 60
merged_data
## all time outs to NA
# merged_data$total_min_prolific[merged_data$total_min_prolific > 1000] <- NA
## all expired data to NA
# merged_data[merged_data == "DATA_EXPIRED"] <- NA
$socio_age <- as.numeric(merged_data$socio_age)
merged_data }
[1] "prolific data sucessfully added"
Warning: NAs durch Umwandlung erzeugt
save all data sets
setwd("outputs/questionnaire")
# Save as .RData objects
save(questionnaire_c1, file = "questionnaire_c1.RData")
save(questionnaire_c3, file = "questionnaire_c3.RData")
save(questionnaire_c5, file = "questionnaire_c5.RData")
save(merged_data, file = "merged_data.RData")
save(questionnaire_rating_long, file = "questionnaire_rating_long.RData")
# Save as Excel files
write_xlsx(questionnaire_c1, "questionnaire_c1.xlsx")
write_xlsx(questionnaire_c3, "questionnaire_c3.xlsx")
write_xlsx(questionnaire_c5, "questionnaire_c5.xlsx")
write_xlsx(merged_data, "merged_data.xlsx")
write_xlsx(questionnaire_rating_long, "questionnaire_rating_long.xlsx")
get reaction times for single components
Plot time taken (in minutes) by participants for single components of study:
<- data.frame(duration = NA, sender = NA, ID = NA, PROLIFIC_PID = NA)
dat_duration
for(i in 1:length(unique(dat_secondPostCAM$ID))){
<- dat_secondPostCAM$PROLIFIC_PID[dat_secondPostCAM$ID == unique(dat_secondPostCAM$ID)[i] & !is.na(dat_secondPostCAM$PROLIFIC_PID)]
tmp_PID
# pre CAM
<- data.frame(duration = dat_preCAM$duration[dat_preCAM$ID == unique(dat_preCAM$ID)[i]] / 1000,
tmp_preCAM sender = dat_preCAM$sender[dat_preCAM$ID == unique(dat_preCAM$ID)[i]])
<- tmp_preCAM[!is.na(tmp_preCAM$sender),]
tmp_preCAM
# post CAM
<- data.frame(duration = dat_postCAM$duration[dat_postCAM$ID == unique(dat_postCAM$ID)[i]] / 1000,
tmp_postCAM sender = dat_postCAM$sender[dat_postCAM$ID == unique(dat_postCAM$ID)[i]])
<- tmp_postCAM[!is.na(tmp_postCAM$sender),]
tmp_postCAM
# pre CAM
<- data.frame(duration = dat_secondPostCAM$duration[dat_secondPostCAM$ID == unique(dat_secondPostCAM$ID)[i]] / 1000,
tmp_secondPostCAM sender = dat_secondPostCAM$sender[dat_secondPostCAM$ID == unique(dat_secondPostCAM$ID)[i]])
<- tmp_secondPostCAM[!is.na(tmp_secondPostCAM$sender),]
tmp_secondPostCAM
<- rbind(tmp_preCAM, tmp_postCAM, tmp_secondPostCAM)
tmp
if(all(is.na(dat_duration))){
<- data.frame(duration = tmp$duration,
dat_duration sender = tmp$sender,
ID = rep(i, times=nrow(tmp)),
PROLIFIC_PID = rep(tmp_PID, times=nrow(tmp)))
else{
}<- rbind(dat_duration, data.frame(duration = tmp$duration,
dat_duration sender = tmp$sender,
ID = rep(i, times=nrow(tmp)),
PROLIFIC_PID = rep(tmp_PID, times=nrow(tmp))))
}
}
## remove empty sender
<- dat_duration[!is.na(dat_duration$sender), ]
dat_duration <- dat_duration[!is.na(dat_duration$duration), ]
dat_duration
$sender[dat_duration$sender == "done"] <- "CAM instructions"
dat_duration
## save as .xlsx
# write.xlsx2(x = dat_duration, file = "outputs/para_duration_singleComponents.xlsx")
#### plot
$ID <- factor(dat_duration$ID)
dat_duration<- dat_duration %>%
p ggplot(aes(x=sender, y=duration, color=PROLIFIC_PID)) +
geom_point() +
geom_jitter(width=0.15)+
theme(axis.text.x = element_text(angle = 90)) + theme(legend.position="none")
p
# Calculate the mean duration in seconds for each sender and sort by mean duration
<- dat_duration %>%
tmp group_by(sender) %>%
summarise(N = n(), mean_duration = mean(duration, na.rm = TRUE)) %>%
arrange(desc(mean_duration))
::datatable(tmp, options = list(pageLength = 5)) DT
set up CAM data
pre CAM (component 2)
Load CAM data
setwd("outputs")
suppressMessages(read_file("CAMdata.txt") %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
discard(function(x) x == '') %>%
discard(function(x) x == '\r') %>%
# ... filter empty rows ...
discard(function(x) x == '')) -> dat_CAM_pre
<- list()
raw_CAM_pre for(i in 1:length(dat_CAM_pre)){
<- jsonlite::fromJSON(txt = dat_CAM_pre[[i]])
raw_CAM_pre[[i]] }
Create CAM files, draw CAMs and compute network indicators
### create CAM single files (nodes, connectors, merged)
<- create_CAMfiles(datCAM = raw_CAM_pre, reDeleted = TRUE) CAMfiles_pre
Nodes and connectors, which were deleted by participants were removed.
# deleted nodes: 32
# deleted connectors: 18
# remove testing data sets
nrow(CAMfiles_pre[[1]])
[1] 306
1]] <- CAMfiles_pre[[1]][nchar(CAMfiles_pre[[1]]$participantCAM) == 24,]
CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][nchar(CAMfiles_pre[[2]]$participantCAM) == 24,]
CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][nchar(CAMfiles_pre[[3]]$participantCAM.x) == 24,]
CAMfiles_pre[[# number of CAMs collected
nrow(CAMfiles_pre[[1]])
[1] 306
# remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_pre[[1]]$participantCAM[CAMfiles_pre[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
# print(tmp_pid)
## remove person from questionnaire data
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% tmp_pid,]
## remove person from CAM data
# table(CAMfiles_pre[[1]][CAMfiles_pre[[1]]$participantCAM %in% tmp_pid,]$text)
# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][!CAMfiles_pre[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_pre[[2]] <- CAMfiles_pre[[2]][!CAMfiles_pre[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_pre[[3]] <- CAMfiles_pre[[3]][!CAMfiles_pre[[3]]$participantCAM.x %in% tmp_pid,]
# remove 7 empty concepts:
# CAMfiles_pre[[1]]$text[nchar(CAMfiles_pre[[1]]$text) < 2]
# tmp_ids <- CAMfiles_pre[[1]]$id[nchar(CAMfiles_pre[[1]]$text) < 2]
# table(CAMfiles_pre[[1]]$isActive[CAMfiles_pre[[1]]$id %in% tmp_ids])
# table(CAMfiles_pre[[1]]$participantCAM[CAMfiles_pre[[1]]$id %in% tmp_ids])
#
# CAMfiles_pre[[1]] <- CAMfiles_pre[[1]][!CAMfiles_pre[[1]]$id %in% tmp_ids,]
### draw CAMs
<- draw_CAM(dat_merged = CAMfiles_pre[[3]],
CAMdrawn_pre dat_nodes = CAMfiles_pre[[1]],ids_CAMs = "all",
plot_CAM = FALSE,
useCoordinates = TRUE,
relvertexsize = 3,
reledgesize = 1)
processing 21 CAMs...
Warning: `graph.data.frame()` was deprecated in igraph 2.0.0.
ℹ Please use `graph_from_data_frame()` instead.
[1] "== participantCAM in drawnCAM"
for(i in 1:length(CAMdrawn_pre)){
if(any(nchar(V(CAMdrawn_pre[[i]])$label) < 3)){
print(V(CAMdrawn_pre[[i]])$label)
}
}
### network indicators
<- c("Auto", "Fahrrad", "ÖPNV")
tmp_microIndicator <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_pre,
networkIndicators_pre micro_degree = tmp_microIndicator,
micro_valence = tmp_microIndicator,
micro_centr_clo = tmp_microIndicator,
micro_transitivity = tmp_microIndicator,
largestClique = FALSE)
Warning: `graph.density()` was deprecated in igraph 2.0.0.
ℹ Please use `edge_density()` instead.
Warning: The `types1` argument of `assortativity()` is deprecated as of igraph 1.6.0.
ℹ Please use the `values` argument instead.
Warning: `assortativity.degree()` was deprecated in igraph 2.0.0.
ℹ Please use `assortativity_degree()` instead.
### wordlist
<- create_wordlist(
CAMwordlist_pre dat_nodes = CAMfiles_pre[[1]],
dat_merged = CAMfiles_pre[[3]],
useSummarized = TRUE,
order = "frequency",
splitByValence = FALSE,
comments = TRUE,
raterSubsetWords = NULL,
rater = FALSE
)
[1] "create_wordlist - use raw words"
[1] 0
[1] 306
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 21 CAMs...
[1] "== participantCAM in drawnCAM"
if(all(nchar(CAMwordlist_pre$Words) > 2)){
print("sucessfully removed empty words")
}
[1] "sucessfully removed empty words"
::datatable(CAMwordlist_pre, options = list(pageLength = 5)) DT
save CAMs as .json files, and as .png (igraph)
if(save_CAMs_as_pictures){
setwd("outputs")
setwd("savedCAMs_pre")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
file.remove(list.files())
cat('\n!
all former .png files have been deleted')
}
### if no participant ID was provided replace by randomly generated CAM ID
if(all(CAMfiles_pre[[3]]$participantCAM.x == "noID")){
3]]$participantCAM.x <- CAMfiles_pre[[3]]$CAM.x
CAMfiles_pre[[
}
### save as .json files, and as .png (igraph)
<- unique(CAMfiles_pre[[3]]$participantCAM.x); length(ids_CAMs)
ids_CAMs
for(i in 1:length(ids_CAMs)){
save_graphic(filename = paste0("CAM", "_t1_", ids_CAMs[i])) # paste0(ids_CAMs[i]))
<- CAMdrawn_pre[[c(1:length(CAMdrawn_pre))[
CAM_igraph names(CAMdrawn_pre) == paste0(unique(CAMfiles_pre[[3]]$participantCAM.x)[i])]]]
plot(CAM_igraph, edge.arrow.size = .7,
layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
vertex.size = 10, vertex.label.cex = .9)
dev.off()
}
setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
file.remove(list.files())
cat('\n!
all former .json files have been deleted')
}for(i in 1:length(raw_CAM_pre)){
if(!is_empty(raw_CAM_pre[[i]]$nodes)){
if(nrow(raw_CAM_pre[[i]]$nodes) > 5){
write(toJSON(raw_CAM_pre[[i]], encoding = "UTF-8"),
paste0(raw_CAM_pre[[i]]$creator, ".json"))
}
}
} }
post CAM (component 4)
Load CAM data
setwd("outputs")
suppressMessages(read_file("secondCAMdata.txt") %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
discard(function(x) x == '') %>%
discard(function(x) x == '\r') %>%
# ... filter empty rows ...
discard(function(x) x == '')) -> dat_CAM_post
<- list()
raw_CAM_post for(i in 1:length(dat_CAM_post)){
<- jsonlite::fromJSON(txt = dat_CAM_post[[i]])
raw_CAM_post[[i]] }
Create CAM files, draw CAMs and compute network indicators
### create CAM single files (nodes, connectors, merged)
<- create_CAMfiles(datCAM = raw_CAM_post, reDeleted = TRUE) CAMfiles_post
Nodes and connectors, which were deleted by participants were removed.
# deleted nodes: 24
# deleted connectors: 17
# remove testing data sets
nrow(CAMfiles_post[[1]])
[1] 368
1]] <- CAMfiles_post[[1]][nchar(CAMfiles_post[[1]]$participantCAM) >= 24,]
CAMfiles_post[[2]] <- CAMfiles_post[[2]][nchar(CAMfiles_post[[2]]$participantCAM) >= 24,]
CAMfiles_post[[3]] <- CAMfiles_post[[3]][nchar(CAMfiles_post[[3]]$participantCAM.x) >= 24,]
CAMfiles_post[[# number of CAMs collected
nrow(CAMfiles_post[[1]])
[1] 368
# remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_post[[1]]$participantCAM[CAMfiles_post[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
# print(tmp_pid)
## removed person already from questionnaire data
# ## remove person from CAM data
# table(CAMfiles_post[[1]][CAMfiles_post[[1]]$participantCAM %in% tmp_pid,]$text)
# CAMfiles_post[[1]] <- CAMfiles_post[[1]][!CAMfiles_post[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_post[[2]] <- CAMfiles_post[[2]][!CAMfiles_post[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_post[[3]] <- CAMfiles_post[[3]][!CAMfiles_post[[3]]$participantCAM.x %in% tmp_pid,]
#
#
# # removed 4 empty concepts:
# CAMfiles_post[[1]]$text[nchar(CAMfiles_post[[1]]$text) < 2 & CAMfiles_post[[1]]$text != "f"]
# tmp_ids <- CAMfiles_post[[1]]$id[nchar(CAMfiles_post[[1]]$text) < 2 & CAMfiles_post[[1]]$text != "f"]
# table(CAMfiles_post[[1]]$isActive[CAMfiles_post[[1]]$id %in% tmp_ids])
# table(CAMfiles_post[[1]]$participantCAM[CAMfiles_post[[1]]$id %in% tmp_ids])
#
#
# CAMfiles_post[[1]] <- CAMfiles_post[[1]][!CAMfiles_post[[1]]$id %in% tmp_ids,]
### draw CAMs
<- draw_CAM(dat_merged = CAMfiles_post[[3]],
CAMdrawn_post dat_nodes = CAMfiles_post[[1]],ids_CAMs = "all",
plot_CAM = FALSE,
useCoordinates = TRUE,
relvertexsize = 3,
reledgesize = 1)
processing 21 CAMs...
[1] "== participantCAM in drawnCAM"
### network indicators
<- c("Auto", "Fahrrad", "ÖPNV")
tmp_microIndicator <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_post,
networkIndicators_post micro_degree = tmp_microIndicator,
micro_valence = tmp_microIndicator,
micro_centr_clo = tmp_microIndicator,
micro_transitivity = tmp_microIndicator,
largestClique = FALSE)
# wordlist
<- create_wordlist(
CAMwordlist_post dat_nodes = CAMfiles_post[[1]],
dat_merged = CAMfiles_post[[3]],
order = "frequency",
splitByValence = FALSE,
comments = TRUE,
raterSubsetWords = NULL,
rater = FALSE
)
[1] "create_wordlist - use raw words"
[1] 0
[1] 368
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 21 CAMs...
[1] "== participantCAM in drawnCAM"
if(all(nchar(CAMwordlist_post$Words) > 2)){
print("sucessfully removed empty words")
else{
}$Words[nchar(CAMwordlist_post$Words) < 2]
CAMwordlist_post }
[1] ""
::datatable(CAMwordlist_post, options = list(pageLength = 5)) DT
save CAMs as .json files, and as .png (igraph)
if(save_CAMs_as_pictures){
setwd("outputs")
setwd("savedCAMs_post")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
file.remove(list.files())
cat('\n!
all former .png files have been deleted')
}
### if no participant ID was provided replace by randomly generated CAM ID
if(all(CAMfiles_post[[3]]$participantCAM.x == "noID")){
3]]$participantCAM.x <- CAMfiles_post[[3]]$CAM.x
CAMfiles_post[[
}
### save as .json files, and as .png (igraph)
<- unique(CAMfiles_post[[3]]$participantCAM.x); length(ids_CAMs)
ids_CAMs
for(i in 1:length(ids_CAMs)){
save_graphic(filename = paste0("CAM", "_t2_", ids_CAMs[i])) # paste0(ids_CAMs[i]))
<- CAMdrawn_post[[c(1:length(CAMdrawn_post))[
CAM_igraph names(CAMdrawn_post) == paste0(unique(CAMfiles_post[[3]]$participantCAM.x)[i])]]]
plot(CAM_igraph, edge.arrow.size = .7,
layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
vertex.size = 10, vertex.label.cex = .9)
dev.off()
}
setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
file.remove(list.files())
cat('\n!
all former .json files have been deleted')
}for(i in 1:length(raw_CAM_post)){
if(!is_empty(raw_CAM_post[[i]]$nodes)){
if(nrow(raw_CAM_post[[i]]$nodes) > 5){
write(toJSON(raw_CAM_post[[i]], encoding = "UTF-8"),
paste0(raw_CAM_post[[i]]$creator, ".json"))
}
}
} }
merge CAM data
Load CAM data
setwd("outputs")
suppressMessages(read_file("mergedCAMdata.txt") %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
discard(function(x) x == '') %>%
discard(function(x) x == '\r') %>%
# ... filter empty rows ...
discard(function(x) x == '')) -> dat_CAM_combined
<- list()
raw_CAM_combined for(i in 1:length(dat_CAM_combined)){
<- jsonlite::fromJSON(txt = dat_CAM_combined[[i]])
raw_CAM_combined[[i]]
}
length(raw_CAM_combined)
[1] 42
Create CAM files, draw CAMs and compute network indicators
### create CAM single files (nodes, connectors, merged)
<- create_CAMfiles(datCAM = raw_CAM_combined, reDeleted = TRUE) CAMfiles_combined
Nodes and connectors, which were deleted by participants were removed.
# deleted nodes: 56
# deleted connectors: 35
# remove testing data sets
nrow(CAMfiles_combined[[1]])
[1] 674
1]] <- CAMfiles_combined[[1]][nchar(CAMfiles_combined[[1]]$participantCAM) >= 24,]
CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][nchar(CAMfiles_combined[[2]]$participantCAM) >= 24,]
CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][nchar(CAMfiles_combined[[3]]$participantCAM.x) >= 24,]
CAMfiles_combined[[nrow(CAMfiles_combined[[1]])
[1] 674
# remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$CAM %in% c("a0c6edeb-267a-4f27-8199-79f896e033ce", "8d74f576-e617-4eb1-8ccf-93589ce6c65b")])
# print(tmp_pid)
## removed person already from questionnaire data
## remove person from CAM data
# table(CAMfiles_combined[[1]][CAMfiles_combined[[1]]$participantCAM %in% tmp_pid,]$text)
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% tmp_pid,]
length(unique(CAMfiles_combined[[1]]$CAM))
[1] 42
# remove empty concepts:
# CAMfiles_combined[[1]]$text[nchar(CAMfiles_combined[[1]]$text) < 2 & CAMfiles_combined[[1]]$text != "f"]
# tmp_ids <- CAMfiles_combined[[1]]$id[nchar(CAMfiles_combined[[1]]$text) < 2 & CAMfiles_combined[[1]]$text != "f"]
# table(CAMfiles_combined[[1]]$isActive[CAMfiles_combined[[1]]$id %in% tmp_ids])
# table(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$id %in% tmp_ids])
#
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$id %in% tmp_ids,]
### add protocol #
# if(consider_Protocol){
# setwd("outputs/01_dataPreperation")
#
# text <- readLines("protocol_after_word2vec.txt", warn = FALSE)
# text <- readLines(textConnection(text, encoding="UTF-8"), encoding="UTF-8")
#
# if (testIfJson(file = text)) {
# protocol <- rjson::fromJSON(file = "protocol_after_word2vec.txt")
#
# ## no CAM deleted
# # CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% protocol$currentCAMs,]
# # CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][CAMfiles_combined[[2]]$CAM %in% protocol$currentCAMs,]
# # CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][CAMfiles_combined[[3]]$CAM.x %in% protocol$currentCAMs,]
#
#
# tmp_out <- overwriteTextNodes(protocolDat = protocol,
# nodesDat = CAMfiles_combined[[1]])
# CAMfiles_combined[[1]] <- tmp_out[[1]]
# # tmp_out[[2]]
#
# } else{
# print("Invalid protocol uploaded")
# }
# }
# vec_CAMs <- c(); h = 1
# for(c in unique(CAMfiles_combined[[1]]$CAM)){
# tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM %in% c,]
#
# if(!(any(c("Rettungsroboter", "sozialer Assistenzroboter") %in% tmp$text) & all(c("Vorteile", "Nachteile") %in% tmp$text))){
# print(c)
# print(tmp$text)
# vec_CAMs[h] <- c
# h = h + 1
# # plot(CAMdrawn_combined[[c]])
# }
# }
## fix manually
# single pre defined concepts were falsely written
## Soziale Assistenzroboter to sozialer Assistenzroboter
# CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"]
# vec_CAMs <- vec_CAMs[!vec_CAMs %in% CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"]]
#
# CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"] <- "Soziale Assistenzroboter"
# CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% "5debfbcc3a11682f0fae8b29" & CAMfiles_combined[[1]]$text == "Soziale Assistenzroboter"] <- "Soziale Assistenzroboter_positive"
#
# ## Roboter to sozialer Rettungsroboter
# CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Roboter"]
# vec_CAMs <- vec_CAMs[!vec_CAMs %in% CAMfiles_combined[[1]]$CAM[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Roboter"]]
#
# CAMfiles_combined[[1]]$text[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Roboter"] <- "Rettungsroboter"
# CAMfiles_combined[[1]]$text_summarized[CAMfiles_combined[[1]]$participantCAM %in% "5ba00acff337030001de805d" & CAMfiles_combined[[1]]$text == "Rettungsroboter"] <- "Rettungsroboter_neutral"
## remove 5 persons because of technical issues
# vec_Pids <- unique(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$CAM %in% vec_CAMs])
# vec_Pids
#
# # remove CAMs
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% vec_Pids,]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% vec_Pids,]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% vec_Pids,]
# remove questionnaires
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% vec_Pids,]
# remove person "6560e6f734ae18bd18474cc9" -> only draw pre-defined concepts
# for(c in unique(CAMfiles_combined[[1]]$participantCAM)){
# tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$participantCAM %in% c,]
#
# if(sum(tmp$text %in% c("Vorteile", "Nachteile")) != 4){
# print(c)
# print(sum(tmp$text %in% c("Vorteile", "Nachteile")))
# print(tmp$text)
# }
# }
## removed person from questionnaire data
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% "6560e6f734ae18bd18474cc9",]
#
# ## remove person from CAM data
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% "6560e6f734ae18bd18474cc9",]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% "6560e6f734ae18bd18474cc9",]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% "6560e6f734ae18bd18474cc9",]
# remove person "65304e8a630196510c79f7df" -> draw multiple times concept "leer"
# for(c in unique(CAMfiles_combined[[1]]$CAM)){
# tmp <- CAMfiles_combined[[1]][CAMfiles_combined[[1]]$CAM == c,]
#
# if(any(table(tmp$text) >= 3)){
# print(c)
# print(sort(table(tmp$text)))
# }
# }
#
# # remove person who draw many empty concepts
# tmp_pid <- unique(CAMfiles_combined[[1]]$participantCAM[CAMfiles_combined[[1]]$CAM %in% c("503b3517-b003-48e5-b121-f48c9a64ecb6", "39e7d213-1276-4da8-99ea-5a13487874e7")])
# print(tmp_pid)
#
# ## remove person from questionnaire data
# questionnaire <- questionnaire[!questionnaire$PROLIFIC_PID %in% tmp_pid,]
#
# ## remove person from CAM data
# CAMfiles_combined[[1]] <- CAMfiles_combined[[1]][!CAMfiles_combined[[1]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[2]] <- CAMfiles_combined[[2]][!CAMfiles_combined[[2]]$participantCAM %in% tmp_pid,]
# CAMfiles_combined[[3]] <- CAMfiles_combined[[3]][!CAMfiles_combined[[3]]$participantCAM.x %in% tmp_pid,]
### draw CAMs
<- draw_CAM(dat_merged = CAMfiles_combined[[3]],
CAMdrawn_combined dat_nodes = CAMfiles_combined[[1]],ids_CAMs = "all",
plot_CAM = FALSE,
useCoordinates = TRUE,
relvertexsize = 3,
reledgesize = 1)
processing 42 CAMs...
[1] "== ids_CAMs in drawnCAM"
### network indicators
<- c("Rettungsroboter", "sozialer Assistenzroboter", "Vorteile", "Nachteile")
tmp_microIndicator <- compute_indicatorsCAM(drawn_CAM = CAMdrawn_combined,
networkIndicators_combined micro_degree = tmp_microIndicator,
micro_valence = tmp_microIndicator,
micro_centr_clo = tmp_microIndicator,
micro_transitivity = tmp_microIndicator,
largestClique = FALSE)
# wordlist
<- create_wordlist(
CAMwordlist_combined dat_nodes = CAMfiles_combined[[1]],
dat_merged = CAMfiles_combined[[3]],
order = "frequency",
splitByValence = FALSE,
comments = TRUE,
raterSubsetWords = NULL,
rater = FALSE
)
[1] "create_wordlist - use raw words"
[1] 0
[1] 674
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 42 CAMs...
[1] "== ids_CAMs in drawnCAM"
if(all(nchar(CAMwordlist_combined$Words) > 2)){
print("sucessfully removed empty words")
else{
}$Words[nchar(CAMwordlist_combined$Words) < 2]
CAMwordlist_combined }
[1] ""
::datatable(CAMwordlist_combined, options = list(pageLength = 5)) DT
identify types of changes (delta CAM)
# backupIDs_post <- CAMfiles_post[[1]]$participantCAM
# CAMfiles_post[[1]]$participantCAM <- str_remove_all(string = CAMfiles_post[[1]]$participantCAM, pattern = "_t2$")
### set A, B, C, D types
if (all(unique(CAMfiles_pre[[1]]$participantCAM) == unique(CAMfiles_post[[1]]$participantCAM))) {
<- c()
vec_type <- 0
error = FALSE
verbose
##
<- list()
list_newWords_text <- list()
list_newWords_value <- list()
list_ids = 1
h for (i in 1:length(unique(CAMfiles_pre[[1]]$participantCAM))) {
<-
praeCAM 1]][CAMfiles_pre[[1]]$participantCAM == unique(CAMfiles_pre[[1]]$participantCAM)[i],]
CAMfiles_pre[[<-
postCAM 1]][CAMfiles_post[[1]]$participantCAM == unique(CAMfiles_post[[1]]$participantCAM)[i],]
CAMfiles_post[[
## to test:
# praeCAM$text %in% postCAM$text
# postCAM$text %in% praeCAM$text
# length(praeCAM$text)
# length(postCAM$text)
# praeCAM$text
# postCAM$text
## Typ A
if (all(postCAM$text %in% praeCAM$text) &
length(postCAM$text) < length(praeCAM$text)) {
<- "A"
vec_type[i] if (verbose) {
cat("\n i:", i, "type:", vec_type[i], "\n")
}= error + 1
error
}
## Typ B
if (all(praeCAM$text %in% postCAM$text) &
length(postCAM$text) > length(praeCAM$text)) {
<- "B"
vec_type[i] if (verbose) {
cat("\n i:", i, "type:", vec_type[i], "\n")
}= error + 1
error
## get words and values
<-
list_newWords_text[[h]] $text[!postCAM$text %in% praeCAM$text]
postCAM<-
list_newWords_value[[h]] $value[!postCAM$text %in% praeCAM$text]
postCAM<- postCAM$id[!postCAM$text %in% praeCAM$text]
list_ids[[h]]
= h + 1
h
}
## Typ C
if (all(praeCAM$text %in% postCAM$text) &
all(postCAM$text %in% praeCAM$text)) {
<- "C"
vec_type[i] if (verbose) {
cat("\n i:", i, "type:", vec_type[i], "\n")
}= error + 1
error
}
## Typ D
# smaller > pr? UE post, post UE pr?
if (sum(praeCAM$text %in% postCAM$text) < length(praeCAM$text) &
sum(postCAM$text %in% praeCAM$text) < length(postCAM$text)) {
<- "D"
vec_type[i] if (verbose) {
cat("\n i:", i, "type:", vec_type[i], "\n")
}= error + 1
error
}
if (error > 1) {
print("ERROR in (not exclusive logical condition)", i)
stop("check your data and adjust this function")
}= 0
error
}
}
table(vec_type)
vec_type
B C D
11 3 7
barplot(table(vec_type))
example for newly added words:
barplot(table(unlist(list_newWords_value)))
# sort(table(unlist(list_newWords_text)))
### add data
# nrow(questionnaire); length(vec_type)
# questionnaire$typeChange <- vec_type
<- data.frame(id = unlist(list_ids),
dat_newWords text = unlist(list_newWords_text),
value = unlist(list_newWords_value))
::datatable(dat_newWords, options = list(pageLength = 5)) DT
merge and save all data
setwd("outputs/CAMs")
if(all(unique(CAMfiles_pre[[1]]$participantCAM) == networkIndicators_pre$participantCAM)){
print("pre CAM ID can be set")
$CAM_ID <- unique(CAMfiles_pre[[1]]$CAM)
networkIndicators_pre }
[1] "pre CAM ID can be set"
# CAMfiles_post[[1]]$participantCAM <- backupIDs_post
if(all(unique(CAMfiles_post[[1]]$participantCAM) == networkIndicators_post$participantCAM)){
print("post CAM ID can be set")
$CAM_ID <- unique(CAMfiles_post[[1]]$CAM)
networkIndicators_post }
[1] "post CAM ID can be set"
### remove all previously removed participants
length(unique(CAMfiles_combined[[1]]$participantCAM))
[1] 21
nrow(networkIndicators_pre)
[1] 21
nrow(networkIndicators_post)
[1] 21
nrow(merged_data)
[1] 21
<-
networkIndicators_pre $participantCAM %in% CAMfiles_combined[[1]]$participantCAM,]
networkIndicators_pre[networkIndicators_pre<-
networkIndicators_post $participantCAM %in% CAMfiles_combined[[1]]$participantCAM,]
networkIndicators_post[networkIndicators_post
nrow(networkIndicators_pre)
[1] 21
nrow(networkIndicators_post)
[1] 21
### match data
if (all(merged_data$PROLIFIC_PID == networkIndicators_pre$participantCAM) &
all(networkIndicators_pre$participantCAM == networkIndicators_post$participantCAM) &
all(unique(CAMfiles_combined[[1]]$participantCAM) == networkIndicators_post$participantCAM)) {
print("all data can be matched row by row")
# fix IDs
$participantCAM <- paste0(networkIndicators_pre$participantCAM, "_pre")
networkIndicators_pre$participantCAM <- paste0(networkIndicators_post$participantCAM, "_post")
networkIndicators_post
# save questionnaire
$participantCAMpre <- networkIndicators_pre$participantCAM
merged_data$participantCAMpost <- networkIndicators_post$participantCAM
merged_data
## save as .xlsx file
::write.xlsx2(x = merged_data, file = "merged_data_final.xlsx")
xlsx## save as R object
saveRDS(merged_data, file = "merged_data_final.rds")
# save network indicators pre
## save as .xlsx file
::write.xlsx2(x = networkIndicators_pre, file = "networkIndicators_pre_final.xlsx")
xlsx## save as R object
saveRDS(networkIndicators_pre, file = "networkIndicators_pre_final.rds")
# save network indicators post
## save as .xlsx file
::write.xlsx2(x = networkIndicators_post, file = "networkIndicators_post_final.xlsx")
xlsx## save as R object
saveRDS(networkIndicators_post, file = "networkIndicators_post_final.rds")
# save CAMfiles pre
saveRDS(CAMfiles_pre, file = "CAMfiles_pre_final.rds")
# save CAMfiles post
saveRDS(CAMfiles_post, file = "CAMfiles_post_final.rds")
# save CAMfiles combined and clean
saveRDS(CAMfiles_combined, file = "CAMfiles_combined_final.rds")
# save CAMfiles combined and clean
# saveRDS(CAMfiles_combined_translated, file = "CAMfiles_combined_final_translated.rds")
# save questionnaire combined with CAMs
colnames(networkIndicators_pre) <- paste0(colnames(networkIndicators_pre), "_pre")
colnames(networkIndicators_post) <- paste0(colnames(networkIndicators_post), "_post")
<- cbind(merged_data, networkIndicators_pre, networkIndicators_post)
merged_dataCAMs dim(merged_dataCAMs)
## save as .xlsx file
::write.xlsx2(x = merged_dataCAMs, file = "merged_dataCAMs_final.xlsx")
xlsx## save as R object
saveRDS(merged_dataCAMs, file = "merged_dataCAMs_final.rds")
}
[1] "all data can be matched row by row"
create data frames of concepts constant (C), deleted (D), new (N)
<- data.frame()
dat_pre_out <- data.frame()
dat_post_out
for (i in 1:nrow(merged_dataCAMs)) {
<-
tmp_pre 1]][CAMfiles_combined[[1]]$CAM %in% merged_dataCAMs$CAM_ID_pre[i],]
CAMfiles_combined[[<-
tmp_post 1]][CAMfiles_combined[[1]]$CAM %in% merged_dataCAMs$CAM_ID_post[i],]
CAMfiles_combined[[
## get date of concepts drawn by data collection tool (no interaction by user) - heuristic !!!:
<-
date_tmp $date[tmp_post$date - min(tmp_post$date) <= .5] # less than half a second
tmp_post
<-
dat_pre c("participantCAM",
tmp_pre[, "id" ,
"text",
"value",
"comment",
"date",
"x_pos",
"y_pos")]
<-
dat_post c("participantCAM",
tmp_post[, "id" ,
"text",
"value",
"comment",
"date",
"x_pos",
"y_pos")]
## set variables:
if (nrow(dat_pre) >= 1) {
## indicate type of concept:
$typeConcept <- NA
dat_pre
<- FALSE
checkOldDeleted
}
if (nrow(dat_post) >= 1) {
$originalConcept_date <- dat_post$date %in% date_tmp
dat_post
## check if positions have changed - heuristic !!!:
$originalConcept_position <- FALSE
dat_post## indicate if position was changed
$changedPosition <- FALSE
dat_post## indicate if valence was changed
$changedValence <- FALSE
dat_post
## indicate type of concept:
$typeConcept <- NA
dat_post
<- FALSE
checkNewAdded
}
## loop through if both data sets exists
if (nrow(dat_pre) >= 1 && nrow(dat_post) >= 1) {
#> through dat_pre
for (p in 1:nrow(dat_pre)) {
## check positon:
<- dat_pre[p, c("x_pos", "y_pos")]
pos_tmp
<-
matching_id $x_pos == pos_tmp$x_pos &
dat_post[dat_post$y_pos == pos_tmp$y_pos, "id"]
dat_post
$originalConcept_position[dat_post$id == matching_id] <-
dat_postTRUE
## check valence
<-
value_tmp $value[dat_post$text %in% dat_pre$text[p]]
dat_post
if (length(value_tmp) != 0) {
if (dat_pre$value[p] != value_tmp[1]) {
$changedValence[dat_post$text %in% dat_pre$text[p]] <- TRUE
dat_post
}
}
## indicate type of concept:
if (dat_pre$text[p] %in% dat_post$text) {
$typeConcept[p] <- "constant"
dat_preelse{
} $typeConcept[p] <- "deleted"
dat_pre# cat("\n deleted concepts:\n", "in i:", i, ", c:", c, "p:", p, "\n")
<- TRUE
checkOldDeleted
}
}
#> through dat_post
for (q in 1:nrow(dat_post)) {
## indicate type of concept:
if (dat_post$text[q] %in% dat_pre$text) {
$typeConcept[q] <- "constant"
dat_postelse{
} $typeConcept[q] <- "new"
dat_post# cat("\n new concepts:\n", "in i:", i, ", c:", c, "p:", p, "\n")
<- TRUE
checkNewAdded
}
## indicate if concept changed positon
if (dat_post$originalConcept_date[q] &&
!dat_post$originalConcept_position[q]) {
$changedPosition[q] <- TRUE
dat_post
}
}
if (all(checkOldDeleted, checkNewAdded)) {
# cat("\n deleted concepts, new concepts:\n", "in i:", i, ", c:", c, "p:", p, "\n")
}
}
## indicate type of concept:
if (nrow(dat_post) == 0) {
$typeConcept <- "deleted"
dat_pre
}
if (nrow(dat_pre) == 0) {
$typeConcept <- "new"
dat_post
}
if (any(is.na(dat_post$typeConcept))) {
cat("\n NA typeConcept:\n", "in i:", i, ", c:", c, "p:", p, "\n")
stop()
}
<- rbind(dat_pre_out, dat_pre)
dat_pre_out <- rbind(dat_post_out, dat_post)
dat_post_out }
# check if I missed any typeConcept
sum(is.na(dat_pre_out$typeConcept))
[1] 0
sum(is.na(dat_post_out$typeConcept))
[1] 0
# check if any data set was multiple times added
sum(table(dat_pre_out$id) >= 2)
[1] 0
sum(table(dat_post_out$id) >= 2)
[1] 0
# number of entries
nrow(dat_pre_out)
[1] 306
nrow(dat_post_out)
[1] 368
# nrow(dat_post_out) - 535 + 68
# types
table(dat_pre_out$typeConcept)
constant deleted
296 10
table(dat_post_out$typeConcept)
constant new
297 71
table(dat_post_out$changedPosition)
FALSE TRUE
306 62
table(dat_post_out$changedValence)
FALSE TRUE
343 25
setwd("outputs/CAM_concepts")
::write.xlsx2(x = dat_pre_out, file = "concepts_preIntervention.xlsx")
xlsx::write.xlsx2(x = dat_post_out, file = "concepts_postIntervention.xlsx") xlsx