= TRUE # TRUE = save CAMs as .json, .png files
save_CAMs_as_pictures = TRUE plot_deleted_CAMs
Data preperation part II of basal attributes article
1 Notes
2 global variables
Define your global variables (can take some time to run):
3 create raw data files
# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))
### load packages
# devtools::install_github('jdwilson4/multilayer_extraction')
require(pacman)
p_load('tidyverse', 'jsonlite', 'magrittr', 'xlsx',
'stargazer', 'psych', 'jtools', 'DT', 'ggstatsplot',
'lavaan',
'regsem', 'MplusAutomation', 'igraph', 'ggplot2', 'tidyLPA', 'MultilayerExtraction',
'Matrix', 'igraph', 'foreach', 'doParallel', 'parallel',
'R.matlab')
### load socio-demographic data
setwd("data demographic")
<- read.csv(file = "prolific_export_65d4744b1373145896174eba.csv", header = TRUE)
prolific
setwd("../data final list basal attributes")
<- read.xlsx2(file = "final list basal attributes.xlsx", sheetIndex = 1)
listBasalAttributes $German.Translation[listBasalAttributes$German.Translation == "bio-inspiriert"] <- "bioinspiriert"
listBasalAttributes$German.Translation[listBasalAttributes$German.Translation == "Energie generierend"] <- "Energie generierend"
listBasalAttributes$German.Translation[listBasalAttributes$German.Translation == "Energie speichernd"] <- "Energie speichernd"
listBasalAttributes
### list data files
setwd("../data")
<- list.files(pattern = "^study_result.*")
folders
### create data files
### get CAM data
writeLines("", "CAMdata.txt") # create file
<- file("CAMdata.txt", "a") # open connection to append
text_connection
### get pre CAM data
writeLines("", "preCAM.txt") # create file
<- file("preCAM.txt", "a") # open connection to append
text_connection_pre
### get post CAM data
writeLines("", "postCAM.txt") # create file
<- file("postCAM.txt", "a") # open connection to append
text_connection_post
for(i in 1:length(folders)){
setwd(folders[i])
if(length(dir()) == 3){
# print(i)
### CAM data
setwd(dir()[2])
<- jsonlite::fromJSON(txt = "data.txt")
tmp writeLines(jsonlite::toJSON(x = tmp), text_connection)
setwd("..")
### pre CAM data
setwd(dir()[1])
<- jsonlite::fromJSON(txt = "data.txt")
tmp writeLines(jsonlite::toJSON(x = tmp), text_connection_pre)
setwd("..")
### post CAM data
setwd(dir()[3])
<- jsonlite::fromJSON(txt = "data.txt")
tmp writeLines(jsonlite::toJSON(x = tmp), text_connection_post)
setwd("..")
}setwd("..")
}
close(text_connection) # close connection CAM
close(text_connection_pre) # close connection
close(text_connection_post) # close connection
### move files to output folder
# copy files (not overwritten)
<- getwd()
tmp_file_from setwd("../outputs/01_dataPreperation")
file.copy(from = paste0(tmp_file_from, "/CAMdata.txt"), to = paste0(getwd(), "/CAMdata.txt"))
[1] FALSE
file.copy(from = paste0(tmp_file_from, "/preCAM.txt"), to = paste0(getwd(), "/preCAM.txt"))
[1] FALSE
file.copy(from = paste0(tmp_file_from, "/postCAM.txt"), to = paste0(getwd(), "/postCAM.txt"))
[1] FALSE
### remove files
file.remove(paste0(tmp_file_from, "/CAMdata.txt"))
[1] TRUE
file.remove(paste0(tmp_file_from, "/preCAM.txt"))
[1] TRUE
file.remove(paste0(tmp_file_from, "/postCAM.txt"))
[1] TRUE
### load functions
setwd("../../functions")
for(i in 1:length(dir())){
# print(dir()[i])
source(dir()[i], encoding = "utf-8")
}
setwd("../functions_CAMapp")
for(i in 1:length(dir())){
# print(dir()[i])
source(dir()[i], encoding = "utf-8")
}rm(i)
### summary function
<- function(data, varname, groupnames){
data_summary require(plyr)
<- function(x, col){
summary_func c(mean = mean(x[[col]], na.rm=TRUE),
se = sd(x[[col]], na.rm=TRUE) / sqrt(length(x[[col]])))
}<-ddply(data, groupnames, .fun=summary_func,
data_sum
varname)<- plyr::rename(data_sum, c("mean" = varname))
data_sum return(data_sum)
}
4 set up data.frame questionnaires
setwd("outputs/01_dataPreperation")
# > pre study
suppressMessages(
read_file('preCAM.txt') %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
# ... filter empty rows ...
discard(function(x)
== '') %>%
x discard(function(x)
== '\r') %>%
x # ... parse JSON into a data.frame
map_dfr(fromJSON, flatten = TRUE)
-> dat_preCAM
) # > post CAM
read_file('postCAM.txt') %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
# ... filter empty rows ...
discard(function(x)
== '') %>%
x discard(function(x)
== '\r') -> tmp_postCAM
x
<- list()
tmp_out for (i in 1:length(tmp_postCAM)) {
<- fromJSON(tmp_postCAM[i])
tmp_out[[i]] $"21" <- NULL # causes error
tmp_out[[i]]
}
<- dplyr::bind_rows(tmp_out)
dat_postCAM rm(tmp_postCAM)
rm(tmp_out)
### create counter variable for both data sets
# > pre study
$ID <- NA
dat_preCAM
<- 0
tmp_IDcounter for (i in 1:nrow(dat_preCAM)) {
if (!is.na(dat_preCAM$sender[i]) &&
$sender[i] == "Greetings") {
dat_preCAM# tmp <- dat_preCAM$prolific_pid[i]
= tmp_IDcounter + 1
tmp_IDcounter
}$ID[i] <- tmp_IDcounter
dat_preCAM
}
# > post CAM
$ID <- NA
dat_postCAM
<- 0
tmp_IDcounter for (i in 1:nrow(dat_postCAM)) {
if (!is.na(dat_postCAM$sender[i]) &&
$sender[i] == "CAMfeedbackGeneral") {
dat_postCAM# tmp <- dat_postCAM$prolific_pid[i]
= tmp_IDcounter + 1
tmp_IDcounter
}$ID[i] <- tmp_IDcounter
dat_postCAM
}
### keep only complete data sets
# > pre study
# sort(table(dat_preCAM$ID))
sum(table(dat_preCAM$ID) != max(table(dat_preCAM$ID)))
[1] 0
sum(table(dat_preCAM$ID) == max(table(dat_preCAM$ID)))
[1] 193
<-
dat_preCAM $ID %in% names(table(dat_preCAM$ID))[table(dat_preCAM$ID) == max(table(dat_preCAM$ID))], ]
dat_preCAM[dat_preCAM
# > post CAM
# sort(table(dat_postCAM$ID))
sum(table(dat_postCAM$ID) != max(table(dat_postCAM$ID)))
[1] 1
sum(table(dat_postCAM$ID) == max(table(dat_postCAM$ID)))
[1] 192
<-
dat_postCAM $ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) == max(table(dat_postCAM$ID))], ]
dat_postCAM[dat_postCAM
### json (from JATOS) to 2D data.frame
# > pre study
# add paradata
<-
tmp_notNumeric str_subset(string = colnames(dat_preCAM), pattern = "^meta|^R")
<-
tmp_notNumeric str_subset(string = tmp_notNumeric,
pattern = "labjs|location",
negate = TRUE)
### get survey
<- c("PROLIFIC_PID",
vec_ques "dummy_informedconsent",
"commCheck",
tmp_notNumeric)
= c("PROLIFIC_PID", tmp_notNumeric)
vec_notNumeric
<- questionnairetype(
questionnaire_preCAM dataset = dat_preCAM,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
dim(questionnaire_preCAM)
[1] 193 17
# > post CAM
<-
tmp_numeric str_subset(string = colnames(dat_postCAM), pattern = "^nfc")
<- c(
vec_ques "PROLIFIC_PID",
"commCheck",
"feedCAM_repres",
"feedCAM_technicalprobs",
"feedCAM_technicalprobsText",
"feedCAM_already",
"feedCAM_alreadyText",
"openQuestion_MaterialSystem",
"openQuestion_Ethic",
"openQuestion_missedPositive",
"openQuestion_missedNegative",
"outcome_research",
"outcome_prohibition",
"outcome_buy",
"outcome_buy_text",
"education",
"experienceMS",
tmp_numeric,"feedback_critic"
)
= c(
vec_notNumeric "PROLIFIC_PID",
"feedCAM_technicalprobsText",
"feedCAM_alreadyText",
"openQuestion_MaterialSystem",
"openQuestion_Ethic",
"openQuestion_missedPositive",
"openQuestion_missedNegative",
"outcome_research",
"outcome_prohibition",
"outcome_buy",
"outcome_buy_text",
"education",
"experienceMS",
"feedback_critic"
)
<- questionnairetype(
questionnaire_postCAM dataset = dat_postCAM,
listvars = vec_ques,
notNumeric = vec_notNumeric,
verbose = FALSE
)
$feedback_critic[questionnaire_postCAM$feedback_critic == ""] <-
questionnaire_postCAMNA
dim(questionnaire_postCAM)
[1] 192 23
$multipleEthic <- NA
questionnaire_postCAM<-
tmp_multipleEthic cbind(dat_postCAM$ID, dat_postCAM$multipleEthic)
$multiplePositive <- NA
questionnaire_postCAM<-
tmp_multiplePositive cbind(dat_postCAM$ID, dat_postCAM$multiplePositive)
$multipleNegative <- NA
questionnaire_postCAM<-
tmp_multipleNegative cbind(dat_postCAM$ID, dat_postCAM$multipleNegative)
for (i in questionnaire_postCAM$ID) {
# add multipleEthic
<- tmp_multipleEthic[tmp_multipleEthic[, 1] == i,]
tmp if (length(unlist(tmp[, 2])) == 3) {
$multipleEthic[questionnaire_postCAM$ID == i] <-
questionnaire_postCAMpaste0(unlist(tmp[, 2]), collapse = " \\ ")
}
# add multiplePositive
<- tmp_multiplePositive[tmp_multiplePositive[, 1] == i,]
tmp if (length(unlist(tmp[, 2])) == 3) {
$multiplePositive[questionnaire_postCAM$ID == i] <-
questionnaire_postCAMpaste0(unlist(tmp[, 2]), collapse = " \\ ")
}
# add multipleNegative
<- tmp_multipleNegative[tmp_multipleNegative[, 1] == i,]
tmp if (length(unlist(tmp[, 2])) == 3) {
$multipleNegative[questionnaire_postCAM$ID == i] <-
questionnaire_postCAMpaste0(unlist(tmp[, 2]), collapse = " \\ ")
}
}
dim(questionnaire_postCAM)
[1] 192 26
### merge all data sets
# remove one missing
<-
questionnaire_preCAM $PROLIFIC_PID %in% questionnaire_postCAM$PROLIFIC_PID,]
questionnaire_preCAM[questionnaire_preCAM
$ID <- NULL
questionnaire_postCAM<-
questionnaire left_join(questionnaire_preCAM, questionnaire_postCAM, by = "PROLIFIC_PID")
## all missing answers to NA
== ""] <- NA
questionnaire[questionnaire ## all feedback smaller than 3 characters to NA
$feedback_critic[nchar(questionnaire$feedback_critic) <= 2] <-
questionnaireNA
dim(questionnaire)
[1] 192 41
### save files
## save as .xlsx file
::write.xlsx2(x = questionnaire, file = "questionnaire.xlsx")
xlsx## save as R object
saveRDS(questionnaire, file = "questionnaire.rds")
5 get reaction times for single components
Plot time taken (in minutes) by participants for single components of study:
<-
dat_duration data.frame(
duration = NA,
sender = NA,
ID = NA,
PROLIFIC_PID = NA
)
for (i in 1:length(unique(dat_preCAM$ID))) {
<-
tmp_PID $PROLIFIC_PID[dat_preCAM$ID == unique(dat_preCAM$ID)[i] &
dat_preCAM!is.na(dat_preCAM$PROLIFIC_PID)]
# pre CAM
<-
tmp_preCAM data.frame(duration = dat_preCAM$duration[dat_preCAM$ID == unique(dat_preCAM$ID)[i]] / 1000,
sender = dat_preCAM$sender[dat_preCAM$ID == unique(dat_preCAM$ID)[i]])
<- tmp_preCAM[!is.na(tmp_preCAM$sender), ]
tmp_preCAM
# post CAM
<-
tmp_postCAM data.frame(duration = dat_postCAM$duration[dat_postCAM$ID == unique(dat_postCAM$ID)[i]] / 1000,
sender = dat_postCAM$sender[dat_postCAM$ID == unique(dat_postCAM$ID)[i]])
<- tmp_postCAM[!is.na(tmp_postCAM$sender), ]
tmp_postCAM
<- rbind(tmp_preCAM, tmp_postCAM)
tmp
if (all(is.na(dat_duration))) {
<- data.frame(
dat_duration duration = tmp$duration,
sender = tmp$sender,
ID = rep(i, times = nrow(tmp)),
PROLIFIC_PID = rep(tmp_PID, times = nrow(tmp))
)
else{
} <-
dat_duration rbind(
dat_duration,data.frame(
duration = tmp$duration,
sender = tmp$sender,
ID = rep(i, times =
nrow(tmp)),
PROLIFIC_PID = rep(tmp_PID, times =
nrow(tmp))
)
)
}
}
## remove empty sender
<- dat_duration[!is.na(dat_duration$sender),]
dat_duration <- dat_duration[!is.na(dat_duration$duration),]
dat_duration
$sender[dat_duration$sender == "done"] <-
dat_duration"CAM instructions"
## save as .xlsx
# write.xlsx2(x = dat_duration, file = "outputs/para_duration_singleComponents.xlsx")
#### plot
$ID <- factor(dat_duration$ID)
dat_duration<- dat_duration %>%
p ggplot(aes(x = sender, y = duration, color = PROLIFIC_PID)) +
geom_point() +
geom_jitter(width = 0.15) +
theme(axis.text.x = element_text(angle = 90)) + theme(legend.position =
"none")
p
## save ggplot as PDF
ggsave(filename = "outputs/01_dataPreperation/durations_components.pdf", p)
# Calculate the mean duration in seconds for each sender and sort by mean duration
<- dat_duration %>%
tmp group_by(sender) %>%
summarise(N = n(),
mean_duration = mean(duration, na.rm = TRUE)) %>%
arrange(desc(mean_duration))
::datatable(tmp, options = list(pageLength = 5)) DT
6 add socio-demographic data
### add prolific data
<- prolific[prolific$Participant.id %in% questionnaire$PROLIFIC_PID,]
prolific <- prolific %>%
prolific arrange(sapply(Participant.id, function(y) which(y == questionnaire$PROLIFIC_PID)))
if(all(prolific$Participant.id == questionnaire$PROLIFIC_PID)){
print("prolific data sucessfully added")
$socio_age <- prolific$Age
questionnaire$socio_sex <- prolific$Sex
questionnaire$socio_ethnicity <- prolific$Ethnicity.simplified
questionnaire$socio_student <- prolific$Student.status
questionnaire$socio_employment <- prolific$Employment.status
questionnaire$total_min_prolific <- prolific$Time.taken / 60
questionnaire## all time outs to NA
# questionnaire$total_min_prolific[questionnaire$total_min_prolific > 1000] <- NA
## all expired data to NA
== "DATA_EXPIRED"] <- NA
questionnaire[questionnaire
$socio_age <- as.numeric(questionnaire$socio_age)
questionnaire }
[1] "prolific data sucessfully added"
## all missing answers to NA
# questionnaire[questionnaire == ""] <- NA
## all feedback smaller than 3 characters to NA
# questionnaire$feedback_critic[nchar(questionnaire$feedback_critic) <= 2] <- NA
6.1 Describe sample
table(questionnaire$socio_sex)
Female Male Prefer not to say
63 127 2
round(x = table(questionnaire$socio_sex) / nrow(questionnaire) * 100, digits = 2)
Female Male Prefer not to say
32.81 66.15 1.04
::describe(x = questionnaire[, c("socio_age", "total_min_prolific")]) psych
vars n mean sd median trimmed mad min max range
socio_age 1 192 30.49 9.12 28.00 29.44 8.90 18.00 71.00 53.00
total_min_prolific 2 192 34.91 13.25 32.48 33.37 10.72 12.17 91.88 79.72
skew kurtosis se
socio_age 1.19 1.91 0.66
total_min_prolific 1.38 2.80 0.96
7 set up CAM data
Load CAM data
setwd("outputs/01_dataPreperation")
suppressMessages(read_file("CAMdata.txt") %>%
# ... split it into lines ...
str_split('\n') %>% first() %>%
discard(function(x) x == '') %>%
discard(function(x) x == '\r') %>%
# ... filter empty rows ...ds
discard(function(x) x == '')) -> dat_CAM_pre
<- list()
raw_CAM for(i in 1:length(dat_CAM_pre)){
<- jsonlite::fromJSON(txt = dat_CAM_pre[[i]])
raw_CAM[[i]] # raw_CAM[[i]]$connectors$isBidirectional <- TRUE
}
Create CAM files, draw CAMs and compute network indicators
### create CAM single files (nodes, connectors, merged)
<- create_CAMfiles(datCAM = raw_CAM, reDeleted = TRUE) CAMfiles
Nodes and connectors, which were deleted by participants were removed.
# deleted nodes: 0
# deleted connectors: 434
nrow(CAMfiles[[1]])
[1] 6369
nrow(CAMfiles[[2]])
[1] 7880
nrow(CAMfiles[[3]])
[1] 15760
# remove testing data sets
1]] <- CAMfiles[[1]][CAMfiles[[1]]$participantCAM %in% questionnaire$PROLIFIC_PID,]
CAMfiles[[2]] <- CAMfiles[[2]][CAMfiles[[2]]$participantCAM %in% questionnaire$PROLIFIC_PID,]
CAMfiles[[3]] <- CAMfiles[[3]][CAMfiles[[3]]$participantCAM.x %in% questionnaire$PROLIFIC_PID,]
CAMfiles[[
### draw CAMs
<- draw_CAM(dat_merged = CAMfiles[[3]],
CAMdrawn dat_nodes = CAMfiles[[1]],ids_CAMs = "all",
plot_CAM = FALSE,
useCoordinates = TRUE,
relvertexsize = 3,
reledgesize = 1)
processing 192 CAMs...
Warning: `graph.data.frame()` was deprecated in igraph 2.0.0.
ℹ Please use `graph_from_data_frame()` instead.
[1] "== participantCAM in drawnCAM"
<- 2
i edge.attributes(graph = CAMdrawn[[i]])
$width
[1] 5 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 1 1 5 1 1 1 1 1 1 1 1 1 1 1 1
[39] 1
$weight
[1] 3 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1
[39] 1
$color
[1] "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey"
[11] "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey"
[21] "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey"
[31] "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey" "grey"
$lty
[1] 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[39] 1
::is_directed(graph = CAMdrawn[[i]]) igraph
[1] FALSE
plot(CAMdrawn[[i]], edge.arrow.size = .2,
layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
vertex.size = 10, vertex.label.cex = .9)
### network indicators
<- V(CAMdrawn[[1]])$label
tmp_microIndicator <- compute_indicatorsCAM(drawn_CAM = CAMdrawn,
networkIndicators micro_degree = tmp_microIndicator,
micro_valence = tmp_microIndicator,
micro_centr_clo = tmp_microIndicator,
micro_transitivity = NULL, # not informative
largestClique = FALSE)
Warning: `graph.density()` was deprecated in igraph 2.0.0.
ℹ Please use `edge_density()` instead.
Warning: The `types1` argument of `assortativity()` is deprecated as of igraph 1.6.0.
ℹ Please use the `values` argument instead.
Warning: `assortativity.degree()` was deprecated in igraph 2.0.0.
ℹ Please use `assortativity_degree()` instead.
### wordlist
<- create_wordlist(
CAMwordlist dat_nodes = CAMfiles[[1]],
dat_merged = CAMfiles[[3]],
useSummarized = TRUE,
order = "frequency",
splitByValence = FALSE,
comments = TRUE,
raterSubsetWords = NULL,
rater = FALSE
)
[1] "create_wordlist - use raw words"
[1] 0
[1] 6336
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 192 CAMs...
[1] "== participantCAM in drawnCAM"
::datatable(CAMwordlist, options = list(pageLength = 5)) DT
7.1 plot CAMs using igraph package
Just plot first 10 CAMs for an impression
for(i in 1:10){ # length(CAMdrawn)
plot(CAMdrawn[[i]], edge.arrow.size = .7,
layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
vertex.size = 10, vertex.label.cex = .9)
}
7.2 save CAMs as .json files, and as .png (igraph)
if(save_CAMs_as_pictures) {
<- raw_CAM
raw_CAM_translated
setwd("outputs/01_dataPreperation")
setwd("savedCAMs")
setwd("png")
### remove all files if there are any
if (length(list.files()) >= 1) {
file.remove(list.files())
cat('\n!
all former .png files have been deleted')
}
### if no participant ID was provided replace by randomly generated CAM ID
if (all(CAMfiles[[3]]$participantCAM.x == "noID")) {
3]]$participantCAM.x <- CAMfiles[[3]]$CAM.x
CAMfiles[[
}
### save as .json files, and as .png (igraph)
<- unique(CAMfiles[[3]]$participantCAM.x)
ids_CAMs length(ids_CAMs)
for (i in 1:length(ids_CAMs)) {
save_graphic(filename = paste0("CAM", ids_CAMs[i])) # paste0(ids_CAMs[i]))
<- CAMdrawn[[c(1:length(CAMdrawn))[names(CAMdrawn) == paste0(unique(CAMfiles[[3]]$participantCAM.x)[i])]]]
CAM_igraph
### translate:
V(CAM_igraph)$label[match(listBasalAttributes$German.Translation,
V(CAM_igraph)$label)] <- listBasalAttributes$Basal.Attribute
V(CAM_igraph)$label[V(CAM_igraph)$label == "Akzeptanz eines neuen Materialsystems"] <-
"Acceptance of a new material system"
plot(
CAM_igraph,edge.arrow.size = .7,
layout = layout_nicely,
vertex.frame.color = "black",
asp = .5,
margin = -0.1,
vertex.size = 10,
vertex.label.cex = .9
)dev.off()
}
setwd("../json")
### remove all files if there are any
if (length(list.files()) >= 1) {
file.remove(list.files())
cat('\n!
all former .json files have been deleted')
}for (i in 1:length(raw_CAM_translated)) {
if (!is_empty(raw_CAM_translated[[i]]$nodes)) {
if (nrow(raw_CAM_translated[[i]]$nodes) > 5) {
### translate:
$nodes$text[match(listBasalAttributes$German.Translation,
raw_CAM_translated[[i]]$nodes$text)] <-
raw_CAM_translated[[i]]$Basal.Attribute
listBasalAttributes$nodes$text[raw_CAM_translated[[i]]$nodes$text == "Akzeptanz eines neuen Materialsystems"] <-
raw_CAM_translated[[i]]"Acceptance of a new material system"
if (raw_CAM_translated[[i]]$creator %in% questionnaire$PROLIFIC_PID) {
# only if not deleted previously
write(
toJSON(raw_CAM_translated[[i]], encoding = "UTF-8"),
paste0(raw_CAM_translated[[i]]$creator, ".json")
)
}
}
}
} }
!
all former .png files have been deleted
!
all former .json files have been deleted
7.3 delete CAMs with star topology
### 10% of participants have connected all basal attributes to the central one
barplot(
table(
$degreetot_micro_AkzeptanzeinesneuenMaterialsystems
networkIndicators/ nrow(networkIndicators)
) )
### delete extreme CAMs without any inter-connections between basal attributes
<- c()
vec_delete = 1
h
for (i in 1:nrow(networkIndicators)) {
if (networkIndicators$degreetot_micro_AkzeptanzeinesneuenMaterialsystems[i] >= 30) {
# & networkIndicators$transitivity_macro[i] == 0
<- names(CAMdrawn)[i]
vec_delete[h] = h + 1
h
if(plot_deleted_CAMs){
plot(
CAMdrawn[[i]],edge.arrow.size = .7,
layout = layout_nicely,
vertex.frame.color = "black",
asp = .5,
margin = -0.1,
vertex.size = 10,
vertex.label.cex = .9,
main = networkIndicators$participantCAM[i]
)
}
} }
if (length(vec_delete) > 1) {
cat(length(vec_delete), "CAMs are removed from the data set, which results in",
round(x = (1-(length(CAMdrawn) - length(vec_delete)) / length(CAMdrawn)) * 100, digits = 2),
"% loss of data")
## remove deleted CAMs from
#> CAM files
<- CAMfiles
CAMfiles_clean 1]] <-
CAMfiles_clean[[1]][!CAMfiles_clean[[1]]$participantCAM %in% vec_delete,]
CAMfiles_clean[[2]] <-
CAMfiles_clean[[2]][!CAMfiles_clean[[2]]$participantCAM %in% vec_delete,]
CAMfiles_clean[[3]] <-
CAMfiles_clean[[3]][!CAMfiles_clean[[3]]$participantCAM.x %in% vec_delete,]
CAMfiles_clean[[
#> drawn CAMs
<- CAMdrawn[!names(CAMdrawn) %in% vec_delete]
CAMdrawn_clean
#> network indicators
<-
networkIndicators_clean !networkIndicators$participantCAM %in% vec_delete,]
networkIndicators[
### 10% of participants have connected all basal attributes to the central one
barplot(
table(
$degreetot_micro_AkzeptanzeinesneuenMaterialsystems
networkIndicators_clean/ nrow(networkIndicators_clean)
)
)
#> re-create wordlist
<- create_wordlist(
CAMwordlist_clean dat_nodes = CAMfiles_clean[[1]],
dat_merged = CAMfiles_clean[[3]],
useSummarized = TRUE,
order = "frequency",
splitByValence = FALSE,
comments = TRUE,
raterSubsetWords = NULL,
rater = FALSE
)
#> questionnaire
<-
questionnaire_clean !questionnaire$PROLIFIC_PID %in% vec_delete,]
questionnaire[
}
23 CAMs are removed from the data set, which results in 11.98 % loss of data
[1] "create_wordlist - use raw words"
[1] 0
[1] 5577
[1] "temporarily suffixes are added, because not all words have been summarized"
processing 169 CAMs...
[1] "== participantCAM in drawnCAM"
7.4 aggregate CAMs
this is needed for MultilayerExtraction.
<- unique(CAMfiles_clean[[1]]$participantCAM)
sel_ids <-
CAMaggregated_clean aggregate_CAMs(dat_merged = CAMfiles_clean[[3]],
dat_nodes = CAMfiles_clean[[1]],
ids_CAMs = sel_ids)
[1] "aggregate_CAMs: using participant CAM ids"
processing 169 CAMs...
[1] "== participantCAM in drawnCAM"
1]][1:5, 1:5] CAMaggregated_clean[[
Akzeptanz eines neuen Materialsystems
Akzeptanz eines neuen Materialsystems 169
Energie speichernd 27
wartungsintensiv 63
aktive Formänderung durch Umwelteinwirkung 44
energieeffizient 45
Energie speichernd
Akzeptanz eines neuen Materialsystems 27
Energie speichernd 169
wartungsintensiv 1
aktive Formänderung durch Umwelteinwirkung 1
energieeffizient 59
wartungsintensiv
Akzeptanz eines neuen Materialsystems 63
Energie speichernd 1
wartungsintensiv 169
aktive Formänderung durch Umwelteinwirkung 4
energieeffizient 4
aktive Formänderung durch Umwelteinwirkung
Akzeptanz eines neuen Materialsystems 44
Energie speichernd 1
wartungsintensiv 4
aktive Formänderung durch Umwelteinwirkung 169
energieeffizient 1
energieeffizient
Akzeptanz eines neuen Materialsystems 45
Energie speichernd 59
wartungsintensiv 4
aktive Formänderung durch Umwelteinwirkung 1
energieeffizient 169
## check for symmetry
all(colSums(CAMaggregated_clean[[5]][[1]]) == rowSums(CAMaggregated_clean[[5]][[1]]))
[1] TRUE
## test first concept of first CAM
colnames(CAMaggregated_clean[[5]][[1]])[1]
[1] "Akzeptanz eines neuen Materialsystems"
colnames(CAMaggregated_clean[[5]][[1]])[CAMaggregated_clean[[5]][[1]][1, ] >= 1]
[1] "nachhaltig" "intelligent" "lebensähnlich"
[4] "Energie generierend" "technologisch"
rownames(CAMaggregated_clean[[5]][[1]])[CAMaggregated_clean[[5]][[1]][1, ] >= 1]
[1] "nachhaltig" "intelligent" "lebensähnlich"
[4] "Energie generierend" "technologisch"
plot(
make_ego_graph(CAMdrawn_clean[[1]], order = 1, 1)[[1]],
edge.arrow.size = .7,
layout = layout_nicely,
vertex.frame.color = "black",
asp = .5,
margin = -0.1,
vertex.size = 10,
vertex.label.cex = .9,
main = networkIndicators$CAM_ID[i]
)
## adjacency matrix (Aij = 1 when nodes i and j are connected and Aij = 0 otherwise)
<- CAMaggregated_clean[[5]]
CAMaggregated_clean_onlyOnes for(i in 1:length(CAMaggregated_clean_onlyOnes)){
>= 1] <- 1
CAMaggregated_clean_onlyOnes[[i]][CAMaggregated_clean_onlyOnes[[i]] }
8 save all final files
setwd("outputs/01_dataPreperation/final")
if (length(vec_delete) > 1) {
if (all(questionnaire_clean$PROLIFIC_PID == networkIndicators_clean$participantCAM) &
all(questionnaire_clean$PROLIFIC_PID == unique(CAMfiles_clean[[1]]$participantCAM))) {
# save questionnaire
## save as .xlsx file
::write.xlsx2(x = questionnaire_clean, file = "questionnaire.xlsx")
xlsx## save as R object
saveRDS(questionnaire_clean, file = "questionnaire.rds")
# save network indicators combined and clean
## save as .xlsx file
::write.xlsx2(x = networkIndicators_clean, file = "networkIndicators.xlsx")
xlsx## save as R object
saveRDS(networkIndicators_clean, file = "networkIndicators.rds")
# save CAMfiles
## save as R object
saveRDS(CAMfiles_clean, file = "CAMfiles.rds")
# save drawn CAMs
## save as R object
saveRDS(CAMdrawn_clean, file = "CAMdrawn.rds")
# save wordlist
## save as .xlsx file
::write.xlsx2(x = CAMwordlist_clean, file = "CAMwordlist.xlsx")
xlsx
# save aggregated CAMs
## save as R object
saveRDS(CAMaggregated_clean, file = "CAMaggregated.rds")
# save adjacency matrix of aggregated CAM as CSV
write.table(CAMaggregated_clean[[1]], file = "CAMaggregated_adj_matrix.csv",
row.names = FALSE, col.names = TRUE)
# export list of single-layer adjacency matrices to MATLAB for use with GenLouvain
<- CAMaggregated_clean[[5]]
multigraph_adj_matrices_list
# R.matlab allows names with only <32 characters
names(multigraph_adj_matrices_list) <- substr(names(multigraph_adj_matrices_list), start = 1, stop = 30)
writeMat(con = "CAMaggregated_adj_matrices.mat", multigraph_adj_matrices_list = multigraph_adj_matrices_list)
<- CAMaggregated_clean_onlyOnes
multigraph_adj_matrices_list
# R.matlab allows names with only <32 characters
names(multigraph_adj_matrices_list) <- substr(names(multigraph_adj_matrices_list), start = 1, stop = 30)
writeMat(con = "CAMaggregated_adj_matrices_onlyOnes.mat", multigraph_adj_matrices_list = multigraph_adj_matrices_list)
cat("clean data sets have been sucessfully saved")
} }
clean data sets have been sucessfully saved