# ============================================================
# 📘 語彙学習データ分析:完全一括処理版(警告なし / yes-no対応)
# ============================================================
pacman::p_load(
tidyverse,
readxl,
gt,
ggbeeswarm
)
# ============================================================
# データパス
# ============================================================
file_paths <- list(
pre_test = "~/Desktop/Tohoku_UCL/Pre_test.xlsx",
immediate_form = "~/Desktop/Tohoku_UCL/Immediate_Form_Recognition.xlsx",
delayed_form = "~/Desktop/Tohoku_UCL/Delayed_Form_Recognition.xlsx",
immediate_meaning_rec = "~/Desktop/Tohoku_UCL/Immediate_Meaning_Recognition.xlsx",
delayed_meaning_rec = "~/Desktop/Tohoku_UCL/Delayed_Meaning_Recognition.xlsx",
immediate_recall = "~/Desktop/Tohoku_UCL/Immediate_Meaning_Recall.xlsx",
delayed_recall = "~/Desktop/Tohoku_UCL/Delayed_Meaning_Recall.xlsx"
)
# ============================================================
# 学習条件抽出
# ============================================================
extract_Condition <- function(participant_id) {
id <- str_trim(toupper(as.character(participant_id)))
first_char <- str_sub(id, 1, 1)
case_when(
first_char == "B" ~ "RWL",
first_char == "R" ~ "Reading",
first_char == "V" ~ "Viewing",
first_char == "P" ~ "Pilot Study",
TRUE ~ "Unknown"
)
}
# ============================================================
# データ整形
# ============================================================
prepare_data <- function(file_path) {
df <- read_excel(file_path)
# 柔軟にID列を認識
possible_id_cols <- c("参加者番号", "id", "ID", "被験者番号", "participant_id", "Participant")
id_col <- intersect(possible_id_cols, names(df))
if (length(id_col) > 0) df <- df %>% rename(id = !!sym(id_col[1]))
# 柔軟に条件列を認識
possible_cond_cols <- c("条件", "condition", "Condition")
cond_col <- intersect(possible_cond_cols, names(df))
has_condition <- length(cond_col) > 0
if (has_condition) df <- df %>% rename(condition_col = !!sym(cond_col[1]))
# participant_id / Condition を生成
df <- df %>%
mutate(
participant_id = str_trim(toupper(as.character(id))),
Condition = if (has_condition) as.character(df$condition_col) else sapply(participant_id, extract_Condition),
Condition = str_trim(str_to_title(Condition)), # 正規化
Condition = case_when(
Condition %in% c("Reading") ~ "Reading",
Condition %in% c("Rwl", "B") ~ "RWL",
Condition %in% c("Viewing") ~ "Viewing",
TRUE ~ Condition
)
) %>%
filter(!Condition %in% c("Pilot Study", "Unknown"))
return(df)
}
# ============================================================
# 語彙リストと正答表
# ============================================================
target_words <- c(
"subsidence", "vent", "rubble", "basalt", "torrent",
"conflagration", "tremor", "drought", "embankment",
"vortex", "reservoir", "combustion", "levee",
"subduction", "famine", "epicenter", "deluge", "outage"
)
correct_answers <- c(
alarm = "警報", barriers = "障壁", basalt = "玄武岩", coast = "海岸",
combustion = "燃焼", communities = "近隣", conflagration = "大火",
deluge = "大洪水", drought = "干ばつ", embankment = "土手",
epicenter = "震源地", event = "出来事", experts = "専門家",
famine = "飢饉", levee = "堤防", outage = "停止", path = "道",
region = "地域", reservoir = "貯水池", route = "経路", rubble = "瓦礫",
soil = "土", subduction = "沈み込み", subsidence = "沈下",
technology = "技術", torrent = "激流", tremor = "震動",
vent = "通気孔", villages = "村", vortex = "渦"
)
# ============================================================
# 採点関数群
# ============================================================
# --- Pre-test ---
score_pretest <- function(data, word_list, word_type) {
data %>%
select(名前, participant_id, Condition, all_of(word_list)) %>%
pivot_longer(cols = all_of(word_list),
names_to = "word", values_to = "response") %>%
mutate(
response_num = case_when(
response == "知っている" ~ 1,
response == "知らない" ~ 0,
TRUE ~ NA_real_
)
) %>%
group_by(名前, participant_id, Condition) %>%
summarise(score = sum(response_num, na.rm = TRUE), .groups = "drop") %>%
mutate(word_type = word_type)
}
# --- Meaning Recognition ---
score_recognition <- function(data, word_list, word_type) {
data %>%
select(名前, participant_id, Condition, all_of(word_list)) %>%
pivot_longer(cols = all_of(word_list),
names_to = "word", values_to = "response") %>%
mutate(
response = str_trim(as.character(response)),
correct = correct_answers[word],
is_correct = response == correct
) %>%
group_by(名前, participant_id, Condition) %>%
summarise(score = sum(is_correct, na.rm = TRUE), .groups = "drop") %>%
mutate(word_type = word_type)
}
# --- Meaning Recall ---
score_recall <- function(data, word_list, word_type) {
data %>%
select(名前, participant_id, Condition, all_of(word_list)) %>%
pivot_longer(cols = all_of(word_list),
names_to = "word", values_to = "response") %>%
mutate(
response = str_trim(as.character(response)),
correct = correct_answers[word],
is_correct = response == correct
) %>%
group_by(名前, participant_id, Condition) %>%
summarise(score = sum(is_correct, na.rm = TRUE), .groups = "drop") %>%
mutate(word_type = word_type)
}
# --- ✅ Form Recognition(yes/no型)---
score_form_recognition <- function(data, word_list, word_type) {
data %>%
filter(word %in% word_list) %>%
mutate(
answer = str_trim(tolower(as.character(answer))),
score_value = case_when(
answer == "yes" ~ 1,
answer == "no" ~ 0,
TRUE ~ NA_real_
)
) %>%
group_by(participant_id, Condition) %>%
summarise(score = sum(score_value, na.rm = TRUE), .groups = "drop") %>%
mutate(word_type = word_type)
}
# ============================================================
# 記述統計表(N, M, SD)
# ============================================================
summary_by_condition <- function(df) {
df %>%
group_by(Condition) %>%
summarise(
N = n(),
M = mean(score, na.rm = TRUE),
SD = sd(score, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
Condition = factor(Condition, levels = c("Reading", "RWL", "Viewing"))
) %>%
arrange(Condition) %>%
gt() %>%
fmt_number(columns = c(M, SD), decimals = 2)
}
# ============================================================
# グラフ
# ============================================================
plot_box <- function(df) {
ggplot(df, aes(x = Condition, y = score, fill = Condition)) +
geom_boxplot(outlier.shape = NA) +
geom_quasirandom(alpha = 0.7, color = "black", width = 0.2) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "yellow") +
scale_fill_manual(values = c(
"Reading" = "salmon",
"RWL" = "green",
"Viewing" = "deepskyblue"
)) +
labs(x = "Condition", y = "Score") +
theme_minimal(base_size = 14) +
theme(plot.title = element_blank(), legend.position = "none")
}
# ============================================================
# 実行処理
# ============================================================
## --- Pre-test
pre_data <- prepare_data(file_paths$pre_test)
pre_target <- score_pretest(pre_data, target_words, "Target")
plot_box(pre_target); summary_by_condition(pre_target)

| Condition |
N |
M |
SD |
| Reading |
7 |
2.86 |
1.21 |
| RWL |
8 |
3.00 |
2.62 |
| Viewing |
8 |
3.25 |
2.31 |
## --- Form Recognition(yes/no)
immediate_form <- prepare_data(file_paths$immediate_form)
delayed_form <- prepare_data(file_paths$delayed_form)
immediate_form_target <- score_form_recognition(immediate_form, target_words, "Target")
delayed_form_target <- score_form_recognition(delayed_form, target_words, "Target")
plot_box(immediate_form_target); summary_by_condition(immediate_form_target)

| Condition |
N |
M |
SD |
| Reading |
6 |
16.17 |
1.72 |
| RWL |
7 |
14.00 |
5.03 |
| Viewing |
7 |
14.57 |
2.76 |
plot_box(delayed_form_target); summary_by_condition(delayed_form_target)

| Condition |
N |
M |
SD |
| Reading |
2 |
12.00 |
1.41 |
| RWL |
3 |
11.33 |
7.37 |
| Viewing |
3 |
15.00 |
3.61 |
## --- Meaning Recognition
immediate_meaning_rec <- prepare_data(file_paths$immediate_meaning_rec)
delayed_meaning_rec <- prepare_data(file_paths$delayed_meaning_rec)
immediate_meaning_rec_target <- score_recognition(immediate_meaning_rec, target_words, "Target")
delayed_meaning_rec_target <- score_recognition(delayed_meaning_rec, target_words, "Target")
plot_box(immediate_meaning_rec_target); summary_by_condition(immediate_meaning_rec_target)

| Condition |
N |
M |
SD |
| Reading |
8 |
5.75 |
1.91 |
| RWL |
8 |
7.38 |
5.58 |
| Viewing |
8 |
5.62 |
4.96 |
plot_box(delayed_meaning_rec_target); summary_by_condition(delayed_meaning_rec_target)

| Condition |
N |
M |
SD |
| Reading |
3 |
6.33 |
2.31 |
| RWL |
4 |
6.25 |
4.72 |
| Viewing |
4 |
9.75 |
7.14 |
## --- Meaning Recall
immediate_recall <- prepare_data(file_paths$immediate_recall)
delayed_recall <- prepare_data(file_paths$delayed_recall)
immediate_recall_target <- score_recall(immediate_recall, target_words, "Target")
delayed_recall_target <- score_recall(delayed_recall, target_words, "Target")
plot_box(immediate_recall_target); summary_by_condition(immediate_recall_target)

| Condition |
N |
M |
SD |
| Reading |
8 |
1.00 |
1.07 |
| RWL |
8 |
1.50 |
1.41 |
| Viewing |
8 |
1.00 |
1.77 |
plot_box(delayed_recall_target); summary_by_condition(delayed_recall_target)

| Condition |
N |
M |
SD |
| Reading |
3 |
1.33 |
0.58 |
| RWL |
4 |
2.25 |
2.63 |
| Viewing |
4 |
2.75 |
4.27 |