Main File for S2 main CAM study

Author

Julius Fenn, Louisa Estadieu

Notes

clean data

Superordinate concepts of raters are written slightly differently.

length(categories_CG$Category)

[1] 15

length(unique(dat$Superordinate))

[1] 37

table(dat$Superordinate)


                                  Anthropomorphism pos. 
                          1                           1 
     Anthropomorphism\nneg.      Anthropomorphism\npos. 
                          1                           5 
     Anthropomorphism, pos. Anthropomorphism, pos./neg. 
                          6                           1 
                  High cost                   High Cost 
                          9                           4 
                  HRI neg.                    HRI pos.  
                          1                           5 
                  HRI, neg.                   HRI, Neg. 
                         21                           3 
                  HRI, pos.                    Low Cost 
                         29                           2 
                   Mistrust                   Mistrust  
                         44                           8 
     Perceived \nUsefulness        Perceived usefulness 
                          2                          81 
       Perceived Usefulness       Perceived Usefulness  
                         41                           1 
      Perceived uselessness       Perceived Uselessness 
                         13                          10 
      Perceived\nUsefulness      Perceived\nUselessness 
                          3                           1 
                       Rest               Rest category 
                         17                          48 
              Rest Category                        Risk 
                         13                          23 
                     Safety          Social impact neg. 
                         22                          11 
         Social impact pos.         Social impact, neg. 
                          3                          19 
        Social impact, pos.    Social impact, pos./neg. 
                         10                           2 
      Societal impact neg.        Societal impact pos.  
                         18                           5 
                      Trust 
                         16

Remove whitespaces

dat$Superordinate <- str_trim(string = dat$Superordinate, side = "both")
length(unique(dat$Superordinate))

[1] 35

Loop over list coding guidelines using approximate matching

vec_unique <-  unique(dat$Superordinate)
for(i in 1:length(vec_unique)){
  tmp <- stringdist::stringdist(a = categories_CG$Category, b = vec_unique[i])
  
  cat("i: ", i, "word: ", vec_unique[i], "\n", categories_CG$Category[tmp <= 2], "\n\n")

  if(length(categories_CG$Category[tmp <= 2]) == 1){
    dat$Superordinate[dat$Superordinate == vec_unique[i]] <- categories_CG$Category[tmp <= 2]
  }
}

i:  1 word:  Rest category 
 Rest Category 

i:  2 word:  Perceived usefulness 
 Perceived Usefulness 

i:  3 word:  HRI, neg. 
 HRI, neg. 

i:  4 word:  Social impact, pos. 
 Social impact pos. 

i:  5 word:  Safety 
 Safety 

i:  6 word:  Social impact, neg. 
 Social impact neg. 

i:  7 word:  Risk 
 Risk 

i:  8 word:  Perceived uselessness 
 Perceived Uselessness 

i:  9 word:  Mistrust 
 Mistrust 

i:  10 word:  Anthropomorphism, pos. 
 Anthropomorphism pos. 

i:  11 word:  High cost 
 High Cost 

i:  12 word:  HRI, pos. 
 HRI, pos. 

i:  13 word:  Trust 
 Trust 

i:  14 word:   
  

i:  15 word:  Anthropomorphism, pos./neg. 
  

i:  16 word:  Social impact, pos./neg. 
  

i:  17 word:  Perceived Usefulness 
 Perceived Usefulness 

i:  18 word:  Societal impact pos. 
 Social impact pos. 

i:  19 word:  Anthropomorphism
pos. 
 Anthropomorphism pos. 

i:  20 word:  Rest 
 Risk 

i:  21 word:  Societal impact neg. 
 Social impact neg. 

i:  22 word:  Perceived Uselessness 
 Perceived Uselessness 

i:  23 word:  HRI pos. 
 HRI, pos. 

i:  24 word:  High Cost 
 High Cost 

i:  25 word:  Anthropomorphism
neg. 
 Anthropomorphism neg. 

i:  26 word:  HRI neg. 
 HRI, neg. 

i:  27 word:  Perceived
Usefulness 
 Perceived Usefulness 

i:  28 word:  Rest Category 
 Rest Category 

i:  29 word:  Social impact pos. 
 Social impact pos. 

i:  30 word:  Perceived
Uselessness 
 Perceived Uselessness 

i:  31 word:  HRI, Neg. 
 HRI, neg. 

i:  32 word:  Social impact neg. 
 Social impact neg. 

i:  33 word:  Low Cost 
 Low Cost 

i:  34 word:  Anthropomorphism pos. 
 Anthropomorphism pos. 

i:  35 word:  Perceived 
Usefulness 
 Perceived Usefulness

length(unique(dat$Superordinate))

[1] 18

Set 4 ratings to missing: * Social impact, pos./neg., 2 times * Anthropomorphism, pos./neg., once * empty, once

sum(dat$Superordinate == "Social impact, pos./neg.", na.rm = TRUE)

[1] 2

sum(dat$Superordinate == "Anthropomorphism, pos./neg.", na.rm = TRUE)

[1] 1

sum(dat$Superordinate == "", na.rm = TRUE)

[1] 1

dat$Superordinate[dat$Superordinate == "Social impact, pos./neg."] <- NA
dat$Superordinate[dat$Superordinate == "Anthropomorphism, pos./neg."] <- NA
dat$Superordinate[dat$Superordinate == ""] <- NA

length(categories_CG$Category)

[1] 15

length(unique(dat$Superordinate)) # plus missing

[1] 16

table(dat$Superordinate)


Anthropomorphism neg. Anthropomorphism pos.             High Cost 
                    1                    12                    13 
            HRI, neg.             HRI, pos.              Low Cost 
                   25                    34                     2 
             Mistrust  Perceived Usefulness Perceived Uselessness 
                   52                   128                    24 
        Rest Category                  Risk                Safety 
                   61                    40                    22 
   Social impact neg.    Social impact pos.                 Trust 
                   48                    18                    16

same for files list

for(i in 1:length(files_rater)) {
  # remove whitespaces
  files_rater[[i]]$Superordinate <-
    str_trim(string = files_rater[[i]]$Superordinate, side = "both")
  
  
  # Loop over list coding guidelines using approximate matching
  vec_unique <-  unique(files_rater[[i]]$Superordinate)
  for (j in 1:length(vec_unique)) {
    tmp <-
      stringdist::stringdist(a = categories_CG$Category, b = vec_unique[j])
    
    # cat("i: ", i, "word: ", vec_unique[i], "\n", categories_CG$Category[tmp <= 2], "\n\n")
    
    if (length(categories_CG$Category[tmp <= 2]) == 1) {
      files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == vec_unique[j]] <-
        categories_CG$Category[tmp <= 2]
    }
  }
  
  
  # remove 3 ratings manually
  # files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == "Social impact, pos./neg."] <-
  #   NA
  # files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == "Anthropomorphism, pos./neg."] <-
  #   NA
  # files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == ""] <-
  #   NA
}

compute reliability coefficients

### compute inter-rater coefficients


# addRatingColum(filesRater = files_rater)
cohensKappas <- computeCohensKappa(files = files_rater,
                                   numberRaters = length(files_rater))
cohensKappas

           [,1]       [,2]       [,3]       [,4]       [,5]
[1,] 1.00000000 0.02912621 0.00000000 0.00000000 0.00000000
[2,] 0.02912621 1.00000000 0.00000000 0.00990099 0.00990099
[3,] 0.00000000 0.00000000 1.00000000 0.01960784 0.00000000
[4,] 0.00000000 0.00990099 0.01960784 1.00000000 0.00990099
[5,] 0.00000000 0.00990099 0.00000000 0.00990099 1.00000000

cohensKappasMaximized <- computeCohensKappaMaximized(files = files_rater,
                                                     numberRaters = length(files_rater))
cohensKappasMaximized

          [,1]      [,2]      [,3]      [,4]      [,5]
[1,] 1.0000000 0.2829132 0.4475774 0.3873383 0.4923259
[2,] 0.2829132 1.0000000 0.2283605 0.2476284 0.2834395
[3,] 0.4002999 0.2283605 1.0000000 0.5204385 0.5094031
[4,] 0.3873383 0.2476284 0.5204385 1.0000000 0.5303551
[5,] 0.4923259 0.2834395 0.5094031 0.5303551 1.0000000

see raw data

setwd("data")
tmp_namesRater <- files <- list.files(pattern = "ratings")
setwd("..")
ovallRaterList <- getOverallRaterList(files = files_rater,
                                        orderAlphabetically = TRUE,
                                        raterNames = tmp_namesRater)

DT::datatable(ovallRaterList[,c("Words", str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))], options = list(pageLength = 5))

compute percantage of overlap

ovallRaterList$overlapNum <- NA
for(i in 1:nrow(ovallRaterList)){
ovallRaterList$overlapNum[i] <- max(table(unlist(ovallRaterList[i,c(str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))])))
}

## number raters
length(str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))

[1] 5

table(ovallRaterList$overlapNum)


 1  2  3  4  5 
 2 26 37 20 15

barplot(table(ovallRaterList$overlapNum) / nrow(ovallRaterList) * 100)

DT::datatable(ovallRaterList[ovallRaterList$overlapNum >= 3,c("Words", str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))], options = list(pageLength = 5))

DT::datatable(ovallRaterList[ovallRaterList$overlapNum <= 2,c("Words", str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))], options = list(pageLength = 5))