Supplement CAM-App

Author

Julius Fenn

Notes

Remark:

prepare data

add prolific IDs to raw data set

## load raw CAM data set
rawCAMfile <- vroom::vroom(
  file = "data/CAMspiracy_data.txt",
  delim = "\t",
  show_col_types = FALSE,
  col_names = FALSE)$X1

raw_CAM <- list()
for(i in 1:length(rawCAMfile)){
  if(testIfJson(rawCAMfile[[i]])) {
    raw_CAM[[i]] <- jsonlite::fromJSON(txt = rawCAMfile[[i]])
  }else{
      print("ERROR")
    break
    }
}

## load .xlsx file containing unique prolific IDs and CAM IDs
dat_ids <- xlsx::read.xlsx2(file = "data/questionnaireCAMs_t2.xlsx", sheetIndex = 1)
dat_ids <- dat_ids[, c("CAM_ID", "PROLIFIC_PID")]

for(i in 1:length(raw_CAM)){
  if(!(raw_CAM[[i]]$idCAM %in% dat_ids$CAM_ID)){
    print("Error")
    break
  }else{
    raw_CAM[[i]]$creator <- dat_ids$PROLIFIC_PID[dat_ids$CAM_ID %in% raw_CAM[[i]]$idCAM]
  }
}


## save raw CAM data with unique IDs
setwd("data")
writeLines("", "CAMspiracy_data_fixed.txt") # create file
text_connection <- file("CAMspiracy_data_fixed.txt", "a") # open connection to append

for(i in 1:length(raw_CAM)){
  writeLines(jsonlite::toJSON(x = raw_CAM[[i]]), text_connection)
}

close(text_connection) # close connection


### copy files (not overwritten)
tmp_file_from <-  getwd()
setwd("../outputs")
file.copy(from =  paste0(tmp_file_from, "/CAMspiracy_data_fixed.txt"), to = paste0(getwd(), "/CAMspiracy_data_fixed.txt"))
[1] FALSE
### remove files
file.remove(paste0(tmp_file_from, "/CAMspiracy_data_fixed.txt"))
[1] TRUE

overwrite single words

Remark: manual adjustments…

setwd("outputs")


dat_overwrite$text_overwrite[dat_overwrite$text_overwrite == ""] <- NA
sum(!is.na(dat_overwrite$text_overwrite))
[1] 48
# dat_overwrite$text_overwrite[!is.na(dat_overwrite$text_overwrite)]
dat_overwrite$value <- as.numeric(dat_overwrite$value)

## overwrite single concepts with unique ID
for(i in 1:nrow(dat_overwrite)){
  if(!is.na(dat_overwrite$text_overwrite[i])){
    # print(i)
    
      tmp <- dat_overwrite[i,]
      tmp$value

    if(sum(CAMfiles[[1]]$id %in% tmp$id) != 1){
      print("ERROR")
      break
    }else{
      # add suffix
      if(tmp$value < 0){
        tmp$text_overwrite <- paste0(tmp$text_overwrite, "_negative")
      }else if(tmp$value == 0){
        tmp$text_overwrite <- paste0(tmp$text_overwrite, "_neutral")
      }else if(tmp$value == 10){
        tmp$text_overwrite <- paste0(tmp$text_overwrite, "_ambivalent")
      }else{
        tmp$text_overwrite <- paste0(tmp$text_overwrite, "_positive")
      }
      
      # print(tmp$text_overwrite)
      CAMfiles[[1]][CAMfiles[[1]]$id %in% tmp$id,"text_summarized"] <- tmp$text_overwrite
    }
    
  }
}

## save overwritten nodes files
vroom::vroom_write(x =  CAMfiles[[1]], file = "CAM_nodes_clean.txt")

the fixed data set has been uploaded to the CAM-App

split data set

Data set is split according to persons with low (1) and high conspiracy (3)

dat_ids_consp <- xlsx::read.xlsx2(file = "data/questionnaire_final_t1.xlsx", sheetIndex = 1)
dat_ids_consp <- dat_ids_consp[, c("PROLIFIC_PID", "classes_conspiracy", "country", "mean_CMQ")]

table(dat_ids_consp$classes_conspiracy) # 1 = low, 3 = high

  1   2   3 
224 189 185 
## check classes again
dat_ids_consp$mean_CMQ <- as.numeric(dat_ids_consp$mean_CMQ)
boxplot(dat_ids_consp$mean_CMQ ~ dat_ids_consp$classes_conspiracy)

# table(dat_ids_consp$classes_conspiracy, dat_ids_consp$country)

to low consp.

Split data, where class conspiracy is low (1)

setwd("outputs")
if(!file.exists("lowConspiracy")){
  dir.create("lowConspiracy")
}
setwd("lowConspiracy")


CAMfiles_low <- CAMfiles

## check is ID data set is complete
if(!all(CAMfiles_low[[1]]$participantCAM %in% dat_ids_consp$PROLIFIC_PID)){
    print("Error")
}


dat_ids_consp_low <- dat_ids_consp[dat_ids_consp$classes_conspiracy == "1", ]

## only persons with low conspiracy for CAMfiles_low
CAMfiles_low[[1]] <- CAMfiles_low[[1]][CAMfiles_low[[1]]$participantCAM %in% dat_ids_consp_low$PROLIFIC_PID,]
CAMfiles_low[[2]] <- CAMfiles_low[[2]][CAMfiles_low[[2]]$participantCAM %in% dat_ids_consp_low$PROLIFIC_PID,]
CAMfiles_low[[3]] <- CAMfiles_low[[3]][CAMfiles_low[[3]]$participantCAM.x %in% dat_ids_consp_low$PROLIFIC_PID,]

## save files ob subsets
vroom::vroom_write(x =  CAMfiles_low[[1]], file = "CAM_nodes_low.txt")
vroom::vroom_write(x =  CAMfiles_low[[2]], file = "CAM_connectors_low.txt")
vroom::vroom_write(x =  CAMfiles_low[[3]], file = "CAM_merged_low.txt")

to high consp.

Split data, where class conspiracy is high (3)

setwd("outputs")
if(!file.exists("highConspiracy")){
  dir.create("highConspiracy")
}
setwd("highConspiracy")


CAMfiles_high <- CAMfiles

## check is ID data set is complete
# if(!all(CAMfiles_high[[1]]$participantCAM %in% dat_ids_consp$PROLIFIC_PID)){
#     print("Error")
# }


dat_ids_consp_high <- dat_ids_consp[dat_ids_consp$classes_conspiracy == "3", ]

## only persons with low conspiracy for CAMfiles_high
CAMfiles_high[[1]] <- CAMfiles_high[[1]][CAMfiles_high[[1]]$participantCAM %in% dat_ids_consp_high$PROLIFIC_PID,]
CAMfiles_high[[2]] <- CAMfiles_high[[2]][CAMfiles_high[[2]]$participantCAM %in% dat_ids_consp_high$PROLIFIC_PID,]
CAMfiles_high[[3]] <- CAMfiles_high[[3]][CAMfiles_high[[3]]$participantCAM.x %in% dat_ids_consp_high$PROLIFIC_PID,]

## save files ob subsets
vroom::vroom_write(x =  CAMfiles_high[[1]], file = "CAM_nodes_high.txt")
vroom::vroom_write(x =  CAMfiles_high[[2]], file = "CAM_connectors_high.txt")
vroom::vroom_write(x =  CAMfiles_high[[3]], file = "CAM_merged_high.txt")

aggregate CAMs high

sel_ids <- unique(CAMfiles_high[[1]]$participantCAM)

tmp_nodes <- CAMfiles_high[[1]]

tmp_nodes$text_summarized <- str_remove(string = tmp_nodes$text_summarized, pattern = "_positive$|_negative$|_neutral$|_ambivalent$")
tmp_nodes$text_summarized <- str_trim(string = tmp_nodes$text_summarized)


tmp_nodes_high <- tmp_nodes
tmp_nodes_high$group <- "high"

CAMaggregated <- aggregate_CAMs(dat_merged = CAMfiles_high[[3]], dat_nodes = tmp_nodes,
                                ids_CAMs = sel_ids)
[1] "aggregate_CAMs: using participant CAM ids"
processing 62 CAMs... 
[1] "== participantCAM in drawnCAM"
[1] "text_summarized column identified"
# plot(CAMaggregated[[2]], vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*20, edge.arrow.size=0.01)
# plot(CAMaggregated[[2]], vertex.size=(abs(V(CAMaggregated[[2]])$value)+1)*5, edge.arrow.size=0.01)


g = CAMaggregated[[2]]
g2 = simplify(CAMaggregated[[2]])
# plot(g2, edge.arrow.size=0.01,
#      vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*20)

E(g2)$weight = sapply(E(g2), function(e) {
  length(all_shortest_paths(g, from=ends(g2, e)[1], to=ends(g2, e)[2])$res) } )
E(g2)$weight = E(g2)$weight / 2
# E(g2)$weight[E(g2)$weight == 1] <- NA

V(g2)$color[V(g2)$value <= .5 & V(g2)$value >= -.5] <- "yellow"

V(g2)$shape <- NA
V(g2)$shape <- ifelse(test = V(g2)$color == "yellow", yes = "square", no = "circle")



### > plot multiple times because of random layout
for(i in 1:5){
plot(g2, edge.arrow.size = 0,
     layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
     vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*5,
     vertex.label.cex = .9, 
     edge.weight=2, edge.width=(E(g2)$weight/10))
}

tmp <- make_ego_graph(g, order = 2, "conspirative narration")
plot(tmp[[1]], edge.arrow.size = 0,
     layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
     vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*5,
     vertex.label.cex = .9, 
     edge.weight=2, edge.width=(E(g2)$weight/10))

# tmp <- CAMaggregated[[1]][rownames(CAMaggregated[[1]]) == "conspirative narration", ]
# tmp[tmp != 0]

aggregate CAMs low

sel_ids <- unique(CAMfiles_low[[1]]$participantCAM)

tmp_nodes <- CAMfiles_low[[1]]

tmp_nodes$text_summarized <- str_remove(string = tmp_nodes$text_summarized, pattern = "_positive$|_negative$|_neutral$|_ambivalent$")
tmp_nodes$text_summarized <- str_trim(string = tmp_nodes$text_summarized)

tmp_nodes_low <- tmp_nodes
tmp_nodes_low$group <- "low"

CAMaggregated <- aggregate_CAMs(dat_merged = CAMfiles_low[[3]], dat_nodes = tmp_nodes,
                                ids_CAMs = sel_ids)
[1] "aggregate_CAMs: using participant CAM ids"
processing 60 CAMs... 
[1] "== participantCAM in drawnCAM"
[1] "text_summarized column identified"
# plot(CAMaggregated[[2]], vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*20, edge.arrow.size=0.01)
# plot(CAMaggregated[[2]], vertex.size=(abs(V(CAMaggregated[[2]])$value)+1)*5, edge.arrow.size=0.01)


g = CAMaggregated[[2]]
g2 = simplify(CAMaggregated[[2]])
# plot(g2, edge.arrow.size=0.01,
#      vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*20)

E(g2)$weight = sapply(E(g2), function(e) {
  length(all_shortest_paths(g, from=ends(g2, e)[1], to=ends(g2, e)[2])$res) } )
E(g2)$weight = E(g2)$weight / 2
# E(g2)$weight[E(g2)$weight == 1] <- NA

V(g2)$color[V(g2)$value <= .5 & V(g2)$value >= -.5] <- "yellow"

V(g2)$shape <- NA
V(g2)$shape <- ifelse(test = V(g2)$color == "yellow", yes = "square", no = "circle")



### > plot multiple times because of random layout
for(i in 1:5){
plot(g2, edge.arrow.size = 0,
     layout=layout_nicely, vertex.frame.color="black", asp = .5, margin = -0.1,
     vertex.size=diag(CAMaggregated[[1]]) / max(diag(CAMaggregated[[1]]))*5,
     vertex.label.cex = .9, 
     edge.weight=2, edge.width=(E(g2)$weight/2))
}

t tests

nodes_lowHigh <- rbind(tmp_nodes_high, tmp_nodes_low)
nodes_lowHigh$value <- ifelse(test = nodes_lowHigh$value == 10, yes = 0, no = nodes_lowHigh$value)


tmp_nodes_lowHigh <- nodes_lowHigh[nodes_lowHigh$text_summarized == "climate change",]
describeBy(tmp_nodes_lowHigh$value, tmp_nodes_lowHigh$group)

 Descriptive statistics by group 
group: high
   vars  n  mean   sd median trimmed mad min max range  skew kurtosis   se
X1    1 62 -0.44 1.33      0   -0.34   0  -3   3     6 -0.51     0.94 0.17
------------------------------------------------------------ 
group: low
   vars  n  mean   sd median trimmed  mad min max range  skew kurtosis   se
X1    1 60 -1.38 1.43     -1   -1.35 1.48  -3   0     3 -0.11    -1.92 0.18
car::leveneTest(tmp_nodes_lowHigh$value, tmp_nodes_lowHigh$group)
Warning in leveneTest.default(tmp_nodes_lowHigh$value,
tmp_nodes_lowHigh$group): tmp_nodes_lowHigh$group coerced to factor.
Levene's Test for Homogeneity of Variance (center = median)
       Df F value    Pr(>F)    
group   1  16.844 7.435e-05 ***
      120                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(tmp_nodes_lowHigh$value ~ tmp_nodes_lowHigh$group, var.equal = FALSE, alternative = "two.sided")

    Welch Two Sample t-test

data:  tmp_nodes_lowHigh$value by tmp_nodes_lowHigh$group
t = 3.7973, df = 118.66, p-value = 0.0002321
alternative hypothesis: true difference in means between group high and group low is not equal to 0
95 percent confidence interval:
 0.4535787 1.4421202
sample estimates:
mean in group high  mean in group low 
        -0.4354839         -1.3833333 
barplot(table(tmp_nodes_lowHigh$value))

table(tmp_nodes_lowHigh$value, tmp_nodes_lowHigh$group)
    
     high low
  -3   10  23
  -2    1   7
  -1    3   0
  0    45  30
  2     1   0
  3     2   0
# tmp_nodes_lowHigh <- nodes_lowHigh[nodes_lowHigh$text_summarized == "political measures against climate change",]
# describeBy(tmp_nodes_lowHigh$value, tmp_nodes_lowHigh$group)
# t.test(tmp_nodes_lowHigh$value ~ tmp_nodes_lowHigh$group, var.equal = TRUE, alternative = "two.sided")