clean data
Superordinate concepts of raters are written slightly differently.
length(categories_CG$Category)
length(unique(dat$Superordinate))
Anthropomorphism pos.
1 1
Anthropomorphism\nneg. Anthropomorphism\npos.
1 5
Anthropomorphism, pos. Anthropomorphism, pos./neg.
6 1
High cost High Cost
9 4
HRI neg. HRI pos.
1 5
HRI, neg. HRI, Neg.
21 3
HRI, pos. Low Cost
29 2
Mistrust Mistrust
44 8
Perceived \nUsefulness Perceived usefulness
2 81
Perceived Usefulness Perceived Usefulness
41 1
Perceived uselessness Perceived Uselessness
13 10
Perceived\nUsefulness Perceived\nUselessness
3 1
Rest Rest category
17 48
Rest Category Risk
13 23
Safety Social impact neg.
22 11
Social impact pos. Social impact, neg.
3 19
Social impact, pos. Social impact, pos./neg.
10 2
Societal impact neg. Societal impact pos.
18 5
Trust
16
Remove whitespaces
dat$Superordinate <- str_trim(string = dat$Superordinate, side = "both")
length(unique(dat$Superordinate))
Loop over list coding guidelines using approximate matching
vec_unique <- unique(dat$Superordinate)
for(i in 1:length(vec_unique)){
tmp <- stringdist::stringdist(a = categories_CG$Category, b = vec_unique[i])
cat("i: ", i, "word: ", vec_unique[i], "\n", categories_CG$Category[tmp <= 2], "\n\n")
if(length(categories_CG$Category[tmp <= 2]) == 1){
dat$Superordinate[dat$Superordinate == vec_unique[i]] <- categories_CG$Category[tmp <= 2]
}
}
i: 1 word: Rest category
Rest Category
i: 2 word: Perceived usefulness
Perceived Usefulness
i: 3 word: HRI, neg.
HRI, neg.
i: 4 word: Social impact, pos.
Social impact pos.
i: 5 word: Safety
Safety
i: 6 word: Social impact, neg.
Social impact neg.
i: 7 word: Risk
Risk
i: 8 word: Perceived uselessness
Perceived Uselessness
i: 9 word: Mistrust
Mistrust
i: 10 word: Anthropomorphism, pos.
Anthropomorphism pos.
i: 11 word: High cost
High Cost
i: 12 word: HRI, pos.
HRI, pos.
i: 13 word: Trust
Trust
i: 14 word:
i: 15 word: Anthropomorphism, pos./neg.
i: 16 word: Social impact, pos./neg.
i: 17 word: Perceived Usefulness
Perceived Usefulness
i: 18 word: Societal impact pos.
Social impact pos.
i: 19 word: Anthropomorphism
pos.
Anthropomorphism pos.
i: 20 word: Rest
Risk
i: 21 word: Societal impact neg.
Social impact neg.
i: 22 word: Perceived Uselessness
Perceived Uselessness
i: 23 word: HRI pos.
HRI, pos.
i: 24 word: High Cost
High Cost
i: 25 word: Anthropomorphism
neg.
Anthropomorphism neg.
i: 26 word: HRI neg.
HRI, neg.
i: 27 word: Perceived
Usefulness
Perceived Usefulness
i: 28 word: Rest Category
Rest Category
i: 29 word: Social impact pos.
Social impact pos.
i: 30 word: Perceived
Uselessness
Perceived Uselessness
i: 31 word: HRI, Neg.
HRI, neg.
i: 32 word: Social impact neg.
Social impact neg.
i: 33 word: Low Cost
Low Cost
i: 34 word: Anthropomorphism pos.
Anthropomorphism pos.
i: 35 word: Perceived
Usefulness
Perceived Usefulness
length(unique(dat$Superordinate))
Set 4 ratings to missing: * Social impact, pos./neg., 2 times * Anthropomorphism, pos./neg., once * empty, once
sum(dat$Superordinate == "Social impact, pos./neg.", na.rm = TRUE)
sum(dat$Superordinate == "Anthropomorphism, pos./neg.", na.rm = TRUE)
sum(dat$Superordinate == "", na.rm = TRUE)
dat$Superordinate[dat$Superordinate == "Social impact, pos./neg."] <- NA
dat$Superordinate[dat$Superordinate == "Anthropomorphism, pos./neg."] <- NA
dat$Superordinate[dat$Superordinate == ""] <- NA
length(categories_CG$Category)
length(unique(dat$Superordinate)) # plus missing
Anthropomorphism neg. Anthropomorphism pos. High Cost
1 12 13
HRI, neg. HRI, pos. Low Cost
25 34 2
Mistrust Perceived Usefulness Perceived Uselessness
52 128 24
Rest Category Risk Safety
61 40 22
Social impact neg. Social impact pos. Trust
48 18 16
same for files list
for(i in 1:length(files_rater)) {
# remove whitespaces
files_rater[[i]]$Superordinate <-
str_trim(string = files_rater[[i]]$Superordinate, side = "both")
# Loop over list coding guidelines using approximate matching
vec_unique <- unique(files_rater[[i]]$Superordinate)
for (j in 1:length(vec_unique)) {
tmp <-
stringdist::stringdist(a = categories_CG$Category, b = vec_unique[j])
# cat("i: ", i, "word: ", vec_unique[i], "\n", categories_CG$Category[tmp <= 2], "\n\n")
if (length(categories_CG$Category[tmp <= 2]) == 1) {
files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == vec_unique[j]] <-
categories_CG$Category[tmp <= 2]
}
}
# remove 3 ratings manually
# files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == "Social impact, pos./neg."] <-
# NA
# files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == "Anthropomorphism, pos./neg."] <-
# NA
# files_rater[[i]]$Superordinate[files_rater[[i]]$Superordinate == ""] <-
# NA
}
compute reliability coefficients
### compute inter-rater coefficients
# addRatingColum(filesRater = files_rater)
cohensKappas <- computeCohensKappa(files = files_rater,
numberRaters = length(files_rater))
cohensKappas
[,1] [,2] [,3] [,4] [,5]
[1,] 1.00000000 0.02912621 0.00000000 0.00000000 0.00000000
[2,] 0.02912621 1.00000000 0.00000000 0.00990099 0.00990099
[3,] 0.00000000 0.00000000 1.00000000 0.01960784 0.00000000
[4,] 0.00000000 0.00990099 0.01960784 1.00000000 0.00990099
[5,] 0.00000000 0.00990099 0.00000000 0.00990099 1.00000000
cohensKappasMaximized <- computeCohensKappaMaximized(files = files_rater,
numberRaters = length(files_rater))
cohensKappasMaximized
[,1] [,2] [,3] [,4] [,5]
[1,] 1.0000000 0.2829132 0.4475774 0.3873383 0.4923259
[2,] 0.2829132 1.0000000 0.2283605 0.2476284 0.2834395
[3,] 0.4002999 0.2283605 1.0000000 0.5204385 0.5094031
[4,] 0.3873383 0.2476284 0.5204385 1.0000000 0.5303551
[5,] 0.4923259 0.2834395 0.5094031 0.5303551 1.0000000
see raw data
setwd("data")
tmp_namesRater <- files <- list.files(pattern = "ratings")
setwd("..")
ovallRaterList <- getOverallRaterList(files = files_rater,
orderAlphabetically = TRUE,
raterNames = tmp_namesRater)
DT::datatable(ovallRaterList[,c("Words", str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))], options = list(pageLength = 5))
compute percantage of overlap
ovallRaterList$overlapNum <- NA
for(i in 1:nrow(ovallRaterList)){
ovallRaterList$overlapNum[i] <- max(table(unlist(ovallRaterList[i,c(str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))])))
}
## number raters
length(str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))
table(ovallRaterList$overlapNum)
barplot(table(ovallRaterList$overlapNum) / nrow(ovallRaterList) * 100)
DT::datatable(ovallRaterList[ovallRaterList$overlapNum >= 3,c("Words", str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))], options = list(pageLength = 5))
DT::datatable(ovallRaterList[ovallRaterList$overlapNum <= 2,c("Words", str_subset(string = colnames(ovallRaterList), pattern = "Superordinate"))], options = list(pageLength = 5))