# Plot ISC by words
# This code is generated by ChatGPT, adpated from the psychopy version I had before.
library(tidyverse)
library(stringr)
word_order_path <- "preprocessed/word_order.csv"
isc_path <- "results/step1_subject_bootstrap_stats.csv"
experiment_js <- "experiment.js"
# ---------- 1. zh → en mapping from experiment.js ----------
js_txt <- read_file(experiment_js)
# matches { zh: "....", en: "...." }
pairs <- str_match_all(
js_txt,
"\\{\\s*zh:\\s*\"([^\"]+)\"\\s*,\\s*en:\\s*\"([^\"]+)\"\\s*\\}"
)[[1]]
cn2en <- setNames(pairs[, 3], pairs[, 2]) # names = zh, values = en
# ---------- 2. Category definitions (Chinese) ----------
animals_zh <- c("蚂蚁","猫","大象","长颈鹿","熊猫","兔子","老鼠","麻雀","老虎","乌龟")
body_parts_zh <- c("脚踝","胳膊","耳朵","眼睛","手指","膝盖","嘴唇","鼻子","肩膀","大腿")
artifacts_zh <- c(
"空调","斧头","床","扫帚","柜子","椅子","筷子","鼠标","锤子","钥匙",
"微波炉","铅笔","冰箱","剪刀","沙发","勺子","桌子","电视","牙刷","洗衣机"
)
emotional_zh <- c(
"愤怒","反感","冷漠","慈善","舒心","死亡","债务","沮丧","疾病","纠纷",
"错误","兴奋","缘分","过失","恐惧","骗局","友情","快乐","天堂","敌意",
"爱心","魔力","婚姻","奇迹","骄傲","难过","风景","光彩","创伤","暴力"
)
nonemotional_zh <- c(
"协议","买卖","性质","概念","内容","数据","纪律","作用","身份","方法",
"义务","现象","过程","原因","关系","结果","社会","地位","制度","团队"
)
get_category <- function(w) {
if (w %in% animals_zh) return("Animal")
if (w %in% body_parts_zh) return("Face/Body Part")
if (w %in% artifacts_zh) return("Artifact")
if (w %in% emotional_zh) return("Emotional Nonobject")
if (w %in% nonemotional_zh) return("Nonemotional Nonobject")
"Unknown"
}
category_colors <- c(
"Animal" = "#b2182b",
"Face/Body Part" = "#ef8a62",
"Artifact" = "#fddbc7",
"Emotional Nonobject" = "#4393c3",
"Nonemotional Nonobject"= "#2166ac",
"Unknown" = "#999999"
)
# ---------- 3. Load word order + ISC stats ----------
word_order <- read_csv(word_order_path, show_col_types = FALSE) |>
# python used reset_index() starting at 0
mutate(
word_index = row_number() - 1L,
word_zh = word
) |>
select(word_index, word_zh)
step1 <- read_csv(isc_path, show_col_types = FALSE)
# word_index should be 0..89
# str(step1)
# merge
df <- step1 |>
left_join(word_order, by = "word_index") |>
mutate(
word_en = cn2en[word_zh],
category = vapply(word_zh, get_category, character(1))
)
# sanity checks
df %>% filter(is.na(word_en)) %>% select(word_index, word_zh) -> missing_en
if (nrow(missing_en) > 0) {
message("Some words missing English translation in experiment.js:")
print(missing_en)
}
df %>% filter(category == "Unknown") %>% select(word_index, word_zh, word_en) -> unknown_cat
if (nrow(unknown_cat) > 0) {
message("Some words did not match any category:")
print(unknown_cat)
}
# ---------- 4. Sort by mean ISC (Fisher-z) ----------
# step1 columns from python: mean, std_err, ci_2.5, ci_97.5, p_value, word_index
df_sorted <- df |>
arrange(desc(mean)) |>
mutate(
# use English label if available, otherwise fallback to Chinese
label_en = if_else(is.na(word_en), word_zh, word_en),
# fix factor order = sorted order
label_en = factor(label_en, levels = label_en)
)
# ---------- 5. Plot (similar to matplotlib version) ----------
ggplot(df_sorted, aes(x = label_en, y = mean, fill = category)) +
geom_col(color = "black", linewidth = 0.3) +
geom_errorbar(
aes(
ymin = `ci_2.5`,
ymax = `ci_97.5`
),
width = 0.3
) +
scale_fill_manual(values = category_colors) +
coord_cartesian(ylim = c(0, NA)) +
labs(
x = "Words (sorted by ISC)",
y = "Fisher-transformed ISC",
fill = "Category",
title = "Word-level ISC (Fisher-z)"
) +
theme_minimal(base_size = 11) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
panel.grid.major.x = element_blank()
)