Main File for part II of basal attributes article

Author

Julius Fenn

Notes

prepare data

set up data.frame questionnaires

setwd("outputs")
# > pre study
suppressMessages(read_file('preCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_preCAM
# > post first CAM
suppressMessages(read_file('postCAM.txt') %>%
                   # ... split it into lines ...
                   str_split('\n') %>% first() %>%
                   # ... filter empty rows ...
                   discard(function(x) x == '') %>%
                   discard(function(x) x == '\r') %>%
                   # ... parse JSON into a data.frame
                   map_dfr(fromJSON, flatten=TRUE)) -> dat_postCAM


########################################
# create counter variable for both data sets
########################################
### pre study
dat_preCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_preCAM)){
  if(!is.na(dat_preCAM$sender[i]) && dat_preCAM$sender[i] == "Greetings"){
    # tmp <- dat_preCAM$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_preCAM$ID[i] <- tmp_IDcounter
}



### post study
dat_postCAM$ID <- NA

tmp_IDcounter <- 0
for(i in 1:nrow(dat_postCAM)){
  if(!is.na(dat_postCAM$sender[i]) && dat_postCAM$sender[i] == "CAMfeedbackGeneral"){
    # tmp <- dat_postCAM$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
  }
  dat_postCAM$ID[i] <- tmp_IDcounter
}



########################################
# keep only complete data sets
########################################
### pre-study
# sort(table(dat_preCAM$ID))
sum(table(dat_preCAM$ID) != max(table(dat_preCAM$ID)))

[1] 0

sum(table(dat_preCAM$ID) == max(table(dat_preCAM$ID)))

[1] 10

dat_preCAM <- dat_preCAM[dat_preCAM$ID %in% names(table(dat_preCAM$ID))[table(dat_preCAM$ID) == max(table(dat_preCAM$ID))],]

### post-study
# sort(table(dat_postCAM$ID))
sum(table(dat_postCAM$ID) != max(table(dat_postCAM$ID)))

[1] 0

sum(table(dat_postCAM$ID) == max(table(dat_postCAM$ID)))

[1] 10

dat_postCAM <- dat_postCAM[dat_postCAM$ID %in% names(table(dat_postCAM$ID))[table(dat_postCAM$ID) == max(table(dat_postCAM$ID))],]



########################################
# json (from JATOS) to 2D data.frame
########################################
################################ pre-study
### add paradata
tmp_notNumeric <- str_subset(string = colnames(dat_preCAM), pattern = "^meta|^R")
tmp_notNumeric <- str_subset(string = tmp_notNumeric, pattern = "labjs|location", negate = TRUE)

### get survey
vec_ques <- c("PROLIFIC_PID",
              "dummy_informedconsent", 
              "commCheck",
              tmp_notNumeric)

vec_notNumeric = c("PROLIFIC_PID", tmp_notNumeric)

questionnaire_preCAM <- questionnairetype(dataset = dat_preCAM, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric, verbose = FALSE)


dim(questionnaire_preCAM)

[1] 10 17

################################ post-study
vec_ques <- c("PROLIFIC_PID", "commCheck",
              "feedCAM_repres", "feedCAM_technicalprobsText", "feedCAM_alreadyText",
              "openQuestion_MaterialSystem",
              "openQuestion_Ethic",
              "openQuestion_missedPositive", "openQuestion_missedNegative",
              "outcome_research", "outcome_prohibition", "outcome_buy", "outcome_buy_text", 
              "education", "experienceMS",
              "feedback_critic")

vec_notNumeric = c("PROLIFIC_PID", 
                   "feedCAM_technicalprobsText", "feedCAM_alreadyText", 
                                 "openQuestion_MaterialSystem",
                                 "openQuestion_Ethic",
              "openQuestion_missedPositive", "openQuestion_missedNegative",
   "outcome_research", "outcome_prohibition", "outcome_buy", "outcome_buy_text", 
                   "education", "experienceMS",
                   "feedback_critic")

questionnaire_postCAM <- questionnairetype(dataset = dat_postCAM,
                                        listvars = vec_ques,
                                        notNumeric = vec_notNumeric, verbose = FALSE)

questionnaire_postCAM$feedback_critic[questionnaire_postCAM$feedback_critic == ""] <- NA
dim(questionnaire_postCAM)

[1] 10 16

questionnaire_postCAM$multipleEthic <- NA
tmp_multipleEthic <- cbind(dat_postCAM$ID, dat_postCAM$multipleEthic)

questionnaire_postCAM$multiplePositive <- NA
tmp_multiplePositive <- cbind(dat_postCAM$ID, dat_postCAM$multiplePositive)

questionnaire_postCAM$multipleNegative <- NA
tmp_multipleNegative <- cbind(dat_postCAM$ID, dat_postCAM$multipleNegative)


for(i in questionnaire_postCAM$ID){
  # add multipleEthic
  tmp <- tmp_multipleEthic[tmp_multipleEthic[,1] == i, ]
  if(length(unlist(tmp[,2])) == 3){
    questionnaire_postCAM$multipleEthic[questionnaire_postCAM$ID == i] <- paste0(unlist(tmp[,2]), collapse = " \\ ")
  }
  
  # add multiplePositive
    tmp <- tmp_multiplePositive[tmp_multiplePositive[,1] == i, ]
  if(length(unlist(tmp[,2])) == 3){
    questionnaire_postCAM$multiplePositive[questionnaire_postCAM$ID == i] <- paste0(unlist(tmp[,2]), collapse = " \\ ")
  }
    
      # add multipleNegative
    tmp <- tmp_multipleNegative[tmp_multipleNegative[,1] == i, ]
  if(length(unlist(tmp[,2])) == 3){
    questionnaire_postCAM$multipleNegative[questionnaire_postCAM$ID == i] <- paste0(unlist(tmp[,2]), collapse = " \\ ")
  }
}


dim(questionnaire_postCAM)

[1] 10 19

################################ merge two data sets
questionnaire_postCAM$ID <- NULL
questionnaire <- left_join(questionnaire_preCAM, questionnaire_postCAM, by="PROLIFIC_PID")
dim(questionnaire)

[1] 10 34

set up CAM data

pre

Load CAM data

setwd("outputs")
suppressMessages(read_file("CAMdata.txt") %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
    discard(function(x) x == '') %>%
    discard(function(x) x == '\r') %>%
  # ... filter empty rows ...
  discard(function(x) x == '')) -> dat_CAM_pre

raw_CAM <- list()
for(i in 1:length(dat_CAM_pre)){
  raw_CAM[[i]] <- jsonlite::fromJSON(txt = dat_CAM_pre[[i]])
}

Create CAM files, draw CAMs and compute network indicators

########################################
# create CAM single files (nodes, connectors, merged)
########################################
CAMfiles <- create_CAMfiles(datCAM = raw_CAM, reDeleted = TRUE)

Nodes and connectors, which were deleted by participants were removed. 
 # deleted nodes:  0 
 # deleted connectors:  15

## remove white spaces within pre-defined concepts
CAMfiles[[1]]$text <- str_replace_all(string = CAMfiles[[1]]$text, "\\s+", " ")

########################################
# draw CAMs
########################################
CAMdrawn <- draw_CAM(dat_merged = CAMfiles[[3]],
                     dat_nodes = CAMfiles[[1]],ids_CAMs = "all",
                     plot_CAM = FALSE,
                     useCoordinates = TRUE,
                     relvertexsize = 3,
                     reledgesize = 1)

processing 10 CAMs... 
[1] "== participantCAM in drawnCAM"

########################################
# draw CAMs
########################################
tmp_microIndicator <- V(CAMdrawn[[1]])$label # all concepts are pre-defined
networkIndicators <- compute_indicatorsCAM(drawn_CAM = CAMdrawn, 
                                           micro_degree = tmp_microIndicator, 
                                           micro_valence = tmp_microIndicator, 
                                           micro_centr_clo = tmp_microIndicator, 
                                           micro_transitivity = tmp_microIndicator, 
                                           largestClique = FALSE)


########################################
# wordlists
########################################
CAMwordlist <- create_wordlist(
  dat_nodes =  CAMfiles[[1]],
  dat_merged =  CAMfiles[[3]],
  order = "alphabetic",
  splitByValence = FALSE,
  comments = TRUE,
  raterSubsetWords = NULL,
  rater = FALSE
)

processing 10 CAMs... 
[1] "== participantCAM in drawnCAM"

DT::datatable(CAMwordlist, options = list(pageLength = 5))

plot CAMs using igraph package

Just plot first 10 CAMs for an impression

for(i in 1:10){ # length(CAMdrawn)
  plot(CAMdrawn[[i]], edge.arrow.size = .7,
       layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
       vertex.size = 10, vertex.label.cex = .9)
}

plot aggregated CAM

sel_ids <- unique(CAMfiles[[1]]$participantCAM)
CAMaggregated <- aggregate_CAMs(dat_merged = CAMfiles[[3]], dat_nodes = CAMfiles[[1]],
                                ids_CAMs = sel_ids)

[1] "aggregate_CAMs: using participant CAM ids"
processing 10 CAMs... 
[1] "== participantCAM in drawnCAM"

# plot(CAMaggregated[[2]], vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*20, edge.arrow.size=0.01)
# plot(CAMaggregated[[2]], vertex.size=(abs(V(CAMaggregated[[2]])$value)+1)*5, edge.arrow.size=0.01)


g = CAMaggregated[[2]]
g2 = simplify(CAMaggregated[[2]])
# plot(g2, edge.arrow.size=0.01,
#      vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*20)

E(g2)$weight = sapply(E(g2), function(e) {
  length(all_shortest_paths(g, from=ends(g2, e)[1], to=ends(g2, e)[2])$res) } )
E(g2)$weight = E(g2)$weight * 2
# E(g2)$weight[E(g2)$weight == 1] <- NA

V(g2)$color[V(g2)$value <= .5 & V(g2)$value >= -.5] <- "yellow"

V(g2)$shape <- NA
V(g2)$shape <- ifelse(test = V(g2)$color == "yellow", yes = "square", no = "circle")



### > plot multiple times because of random layout
for(i in 1:3){
  plot(g2, edge.arrow.size = 0,
     layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
     vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*5,
     vertex.label.cex = .9, 
     edge.weight=2, edge.width=(E(g2)$weight/3))
}

save CAMs as .json files, and as .png (igraph)

save_CAMs_as_pictures = FALSE

if(save_CAMs_as_pictures){
  setwd("outputs")

setwd("savedCAMs")
setwd("png")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .png files have been deleted')
}

### if no participant ID was provided replace by randomly generated CAM ID

if(all(CAMfiles[[3]]$participantCAM.x == "noID")){
  CAMfiles[[3]]$participantCAM.x <- CAMfiles[[3]]$CAM.x
}

### save as .json files, and as .png (igraph)
ids_CAMs <- unique(CAMfiles[[3]]$participantCAM.x); length(ids_CAMs)


for(i in 1:length(ids_CAMs)){
  save_graphic(filename = paste0(ids_CAMs[i]))
  CAM_igraph <- CAMdrawn[[c(1:length(CAMdrawn))[
    names(CAMdrawn) == paste0(unique(CAMfiles[[3]]$participantCAM.x)[i])]]]
  plot(CAM_igraph, edge.arrow.size = .7,
       layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
       vertex.size = 10, vertex.label.cex = .9)
  dev.off()
}

setwd("../json")
### remove all files if there are any
if(length(list.files()) >= 1){
  file.remove(list.files())
  cat('\n!
      all former .json files have been deleted')
}
for(i in 1:length(raw_CAM)){
  if(!is_empty(raw_CAM[[i]]$nodes)){
    if(nrow(raw_CAM[[i]]$nodes) > 5){
      write(toJSON(raw_CAM[[i]], encoding = "UTF-8"),
            paste0(raw_CAM[[i]]$idCAM, ".json"))
    }
  }
}
}

analyze data

describe data set

feedback to the study

Question:

DT::datatable(questionnaire[,c("PROLIFIC_PID", "feedback_critic")], options = list(pageLength = 5))

Material system that has been thought of

Question: Bitte beschreiben Sie an an welches Materialsystem oder welche Materialsysteme Sie dabei gedacht haben.

DT::datatable(questionnaire[,c("PROLIFIC_PID", "openQuestion_MaterialSystem")], options = list(pageLength = 5))

basal attributes: most pos. / neg.

Question: Welche drei Begriffen nehmen Sie am positivsten bzw. negativsten für die Beschreibung neuer Materialsysteme wahr?

## most positive
tmp <- str_trim(unlist(str_split(string = questionnaire$multiplePositive, pattern = "\\\\")))
sort(table(tmp))

tmp
        bioinspiriert Energie   generierend        energieautonom 
                    1                     1                     1 
          intelligent                robust            nachhaltig 
                    1                     1                     2 
    selbstreparierend               haltbar             langlebig 
                    2                     3                     3 
           ökologisch      energieeffizient      umweltfreundlich 
                    4                     5                     6

## most negative
tmp <- str_trim(unlist(str_split(string = questionnaire$multipleNegative, pattern = "\\\\")))
sort(table(tmp))

tmp
                          Insekten ähnlich 
                                         1 
aktive Formänderung durch Umwelteinwirkung 
                                         2 
                        enthält Kunststoff 
                                         4 
                         leicht zerstörbar 
                                         7 
                          wartungsintensiv 
                                         7 
                           umweltschädlich 
                                         9

## answers of participants
DT::datatable(questionnaire[,c("PROLIFIC_PID", "multiplePositive", "multipleNegative")], options = list(pageLength = 5))

any basal attributes missing?

Question: Fallen Ihnen weitere Eigenschaften ein, die Sie als relevant oder negativ für die Beschreibung neuer Materialsysteme erachten, die in der Liste nicht aufgeführt sind, so können Sie diese gerne in folgenden Textfeldern ergänzen:

## answers of participants
DT::datatable(questionnaire[,c("PROLIFIC_PID", "openQuestion_missedPositive", "openQuestion_missedNegative")], options = list(pageLength = 5))

basal attributes: ethical most relevant

Question: Welche drei Begriffe sind aus Ihrer Sicht in moralischer Hinsicht am “relevantesten”?

## ethical relevant
tmp <- str_trim(unlist(str_split(string = questionnaire$multipleEthic, pattern = "\\\\")))
sort(table(tmp))

tmp
Energie   generierend         selbstheilend      widerstandsfähig 
                    1                     1                     1 
              haltbar             langlebig       umweltschädlich 
                    2                     2                     2 
     energieeffizient            nachhaltig            ökologisch 
                    3                     3                     6 
     umweltfreundlich 
                    9

## answers of participants
DT::datatable(questionnaire[,c("PROLIFIC_PID", "multipleEthic")], options = list(pageLength = 5))

argument for choosen basal attributes

Question: Bitte begründen Sie kurz die Auswahl der ethisch relevanten Begriffe:

## answers of participants
DT::datatable(questionnaire[,c("PROLIFIC_PID", "openQuestion_Ethic")], options = list(pageLength = 5))

outcome questions

Question: Sollte die Entwicklung innovativer Materialsysteme mit öffentlichen Mitteln gefördert werden?

table(questionnaire$outcome_research)


unsure    yes 
     1      9

Question: Sollten die Erforschung und Entwicklung solcher innovativer Materialsysteme verboten werden?

table(questionnaire$outcome_prohibition)


no 
10

Question: Wären Sie bereit, Produkte zu kaufen, die innovative Materialsysteme enthalten?

table(questionnaire$outcome_buy)


unsure    yes 
     2      8

if yes to previous question

Question: An welche möglichen Produkte haben Sie gedacht?

## answers of participants
DT::datatable(questionnaire[,c("PROLIFIC_PID", "outcome_buy_text")], options = list(pageLength = 5))

co-variation of basal attributes

Compare the probability that two concepts are connected in randomly generated networks to drawn CAMs:

## get average number of drawn concepts (here fixed)
numConcepts <- mean(networkIndicators$num_nodes_macro)
## get average density
numDensity <- mean(networkIndicators$density_macro)

## simply get the average probability that two concepts are connected:
g <- igraph::random.graph.game(n = numConcepts, p.or.m = numDensity)
plot(g)

are.connected(g, 1, 2)

[1] FALSE

#> whereby each edge to be drawn has the identical probability in the Erdős–Rényi model
vec_booleanConnected <- c()
# vec_booleanConnected2 <- c()

for(i in 1:10000){
  g <- igraph::random.graph.game(n = numConcepts, p.or.m = numDensity)
  vec_booleanConnected[i] <- are.connected(g, 1, 2)
    # vec_booleanConnected2[i] <- are.connected(g, 22, 26)
}
baselineProbability <- mean(vec_booleanConnected)
baselineProbability

[1] 0.0325

# mean(vec_booleanConnected2)

This baseline probability can be compared to all possible combinations of drawn concepts:

vec_boolean <- c()
for(i in 1:length(CAMdrawn)){
  vec_boolean[i] <- are.connected(graph = CAMdrawn[[i]], 
              v1 = V(CAMdrawn[[i]])[V(CAMdrawn[[i]])$label == "robust"], 
              v2 = V(CAMdrawn[[i]])[V(CAMdrawn[[i]])$label == "widerstandsfähig"])
}

mean(vec_boolean)

[1] 0.1