# ============================================================
# 📘 語彙学習データ分析:完全一括処理版(警告なし / yes-no対応)
# ============================================================

pacman::p_load(
  tidyverse,
  readxl,
  gt,
  ggbeeswarm
)

# ============================================================
# データパス
# ============================================================
file_paths <- list(
  pre_test              = "~/Desktop/Tohoku_UCL/Pre_test.xlsx",
  immediate_form        = "~/Desktop/Tohoku_UCL/Immediate_Form_Recognition.xlsx",
  delayed_form          = "~/Desktop/Tohoku_UCL/Delayed_Form_Recognition.xlsx",
  immediate_meaning_rec = "~/Desktop/Tohoku_UCL/Immediate_Meaning_Recognition.xlsx",
  delayed_meaning_rec   = "~/Desktop/Tohoku_UCL/Delayed_Meaning_Recognition.xlsx",
  immediate_recall      = "~/Desktop/Tohoku_UCL/Immediate_Meaning_Recall.xlsx",
  delayed_recall        = "~/Desktop/Tohoku_UCL/Delayed_Meaning_Recall.xlsx"
)

# ============================================================
# 学習条件抽出
# ============================================================
extract_Condition <- function(participant_id) {
  id <- str_trim(toupper(as.character(participant_id)))
  first_char <- str_sub(id, 1, 1)
  case_when(
    first_char == "B" ~ "RWL",
    first_char == "R" ~ "Reading",
    first_char == "V" ~ "Viewing",
    first_char == "P" ~ "Pilot Study",
    TRUE ~ "Unknown"
  )
}

# ============================================================
# データ整形
# ============================================================
prepare_data <- function(file_path) {
  df <- read_excel(file_path)
  
  # 柔軟にID列を認識
  possible_id_cols <- c("参加者番号", "id", "ID", "被験者番号", "participant_id", "Participant")
  id_col <- intersect(possible_id_cols, names(df))
  if (length(id_col) > 0) df <- df %>% rename(id = !!sym(id_col[1]))
  
  # 柔軟に条件列を認識
  possible_cond_cols <- c("条件", "condition", "Condition")
  cond_col <- intersect(possible_cond_cols, names(df))
  has_condition <- length(cond_col) > 0
  if (has_condition) df <- df %>% rename(condition_col = !!sym(cond_col[1]))
  
  # participant_id / Condition を生成
  df <- df %>%
    mutate(
      participant_id = str_trim(toupper(as.character(id))),
      Condition = if (has_condition) as.character(df$condition_col) else sapply(participant_id, extract_Condition),
      Condition = str_trim(str_to_title(Condition)),   # 正規化
      Condition = case_when(
        Condition %in% c("Reading") ~ "Reading",
        Condition %in% c("Rwl", "B") ~ "RWL",
        Condition %in% c("Viewing") ~ "Viewing",
        TRUE ~ Condition
      )
    ) %>%
    filter(!Condition %in% c("Pilot Study", "Unknown"))
  
  return(df)
}

# ============================================================
# 語彙リストと正答表
# ============================================================
target_words <- c(
  "subsidence", "vent", "rubble", "basalt", "torrent",
  "conflagration", "tremor", "drought", "embankment",
  "vortex", "reservoir", "combustion", "levee",
  "subduction", "famine", "epicenter", "deluge", "outage"
)

correct_answers <- c(
  alarm = "警報", barriers = "障壁", basalt = "玄武岩", coast = "海岸",
  combustion = "燃焼", communities = "近隣", conflagration = "大火",
  deluge = "大洪水", drought = "干ばつ", embankment = "土手",
  epicenter = "震源地", event = "出来事", experts = "専門家",
  famine = "飢饉", levee = "堤防", outage = "停止", path = "道",
  region = "地域", reservoir = "貯水池", route = "経路", rubble = "瓦礫",
  soil = "土", subduction = "沈み込み", subsidence = "沈下",
  technology = "技術", torrent = "激流", tremor = "震動",
  vent = "通気孔", villages = "村", vortex = "渦"
)

# ============================================================
# 採点関数群
# ============================================================

# --- Pre-test ---
score_pretest <- function(data, word_list, word_type) {
  data %>%
    select(名前, participant_id, Condition, all_of(word_list)) %>%
    pivot_longer(cols = all_of(word_list),
                 names_to = "word", values_to = "response") %>%
    mutate(
      response_num = case_when(
        response == "知っている" ~ 1,
        response == "知らない" ~ 0,
        TRUE ~ NA_real_
      )
    ) %>%
    group_by(名前, participant_id, Condition) %>%
    summarise(score = sum(response_num, na.rm = TRUE), .groups = "drop") %>%
    mutate(word_type = word_type)
}

# --- Meaning Recognition ---
score_recognition <- function(data, word_list, word_type) {
  data %>%
    select(名前, participant_id, Condition, all_of(word_list)) %>%
    pivot_longer(cols = all_of(word_list),
                 names_to = "word", values_to = "response") %>%
    mutate(
      response = str_trim(as.character(response)),
      correct = correct_answers[word],
      is_correct = response == correct
    ) %>%
    group_by(名前, participant_id, Condition) %>%
    summarise(score = sum(is_correct, na.rm = TRUE), .groups = "drop") %>%
    mutate(word_type = word_type)
}

# --- Meaning Recall ---
score_recall <- function(data, word_list, word_type) {
  data %>%
    select(名前, participant_id, Condition, all_of(word_list)) %>%
    pivot_longer(cols = all_of(word_list),
                 names_to = "word", values_to = "response") %>%
    mutate(
      response = str_trim(as.character(response)),
      correct = correct_answers[word],
      is_correct = response == correct
    ) %>%
    group_by(名前, participant_id, Condition) %>%
    summarise(score = sum(is_correct, na.rm = TRUE), .groups = "drop") %>%
    mutate(word_type = word_type)
}

# --- ✅ Form Recognition(yes/no型)---
score_form_recognition <- function(data, word_list, word_type) {
  data %>%
    filter(word %in% word_list) %>%
    mutate(
      answer = str_trim(tolower(as.character(answer))),
      score_value = case_when(
        answer == "yes" ~ 1,
        answer == "no"  ~ 0,
        TRUE ~ NA_real_
      )
    ) %>%
    group_by(participant_id, Condition) %>%
    summarise(score = sum(score_value, na.rm = TRUE), .groups = "drop") %>%
    mutate(word_type = word_type)
}

# ============================================================
# 記述統計表(N, M, SD)
# ============================================================
summary_by_condition <- function(df) {
  df %>%
    group_by(Condition) %>%
    summarise(
      N = n(),
      M = mean(score, na.rm = TRUE),
      SD = sd(score, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    mutate(
      Condition = factor(Condition, levels = c("Reading", "RWL", "Viewing"))
    ) %>%
    arrange(Condition) %>%
    gt() %>%
    fmt_number(columns = c(M, SD), decimals = 2)
}

# ============================================================
# グラフ
# ============================================================
plot_box <- function(df) {
  ggplot(df, aes(x = Condition, y = score, fill = Condition)) +
    geom_boxplot(outlier.shape = NA) +
    geom_quasirandom(alpha = 0.7, color = "black", width = 0.2) +
    stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "yellow") +
    scale_fill_manual(values = c(
      "Reading" = "salmon",
      "RWL" = "green",
      "Viewing" = "deepskyblue"
    )) +
    labs(x = "Condition", y = "Score") +
    theme_minimal(base_size = 14) +
    theme(plot.title = element_blank(), legend.position = "none")
}

# ============================================================
# 実行処理
# ============================================================

## --- Pre-test
pre_data <- prepare_data(file_paths$pre_test)
pre_target <- score_pretest(pre_data, target_words, "Target")
plot_box(pre_target); summary_by_condition(pre_target)

Condition N M SD
Reading 7 2.86 1.21
RWL 8 3.00 2.62
Viewing 8 3.25 2.31
## --- Form Recognition(yes/no)
immediate_form <- prepare_data(file_paths$immediate_form)
delayed_form   <- prepare_data(file_paths$delayed_form)
immediate_form_target <- score_form_recognition(immediate_form, target_words, "Target")
delayed_form_target   <- score_form_recognition(delayed_form, target_words, "Target")
plot_box(immediate_form_target); summary_by_condition(immediate_form_target)

Condition N M SD
Reading 6 16.17 1.72
RWL 7 14.00 5.03
Viewing 7 14.57 2.76
plot_box(delayed_form_target); summary_by_condition(delayed_form_target)

Condition N M SD
Reading 2 12.00 1.41
RWL 3 11.33 7.37
Viewing 3 15.00 3.61
## --- Meaning Recognition
immediate_meaning_rec <- prepare_data(file_paths$immediate_meaning_rec)
delayed_meaning_rec   <- prepare_data(file_paths$delayed_meaning_rec)
immediate_meaning_rec_target <- score_recognition(immediate_meaning_rec, target_words, "Target")
delayed_meaning_rec_target   <- score_recognition(delayed_meaning_rec, target_words, "Target")
plot_box(immediate_meaning_rec_target); summary_by_condition(immediate_meaning_rec_target)

Condition N M SD
Reading 8 5.75 1.91
RWL 8 7.38 5.58
Viewing 8 5.62 4.96
plot_box(delayed_meaning_rec_target); summary_by_condition(delayed_meaning_rec_target)

Condition N M SD
Reading 3 6.33 2.31
RWL 4 6.25 4.72
Viewing 4 9.75 7.14
## --- Meaning Recall
immediate_recall <- prepare_data(file_paths$immediate_recall)
delayed_recall   <- prepare_data(file_paths$delayed_recall)
immediate_recall_target <- score_recall(immediate_recall, target_words, "Target")
delayed_recall_target   <- score_recall(delayed_recall, target_words, "Target")
plot_box(immediate_recall_target); summary_by_condition(immediate_recall_target)

Condition N M SD
Reading 8 1.00 1.07
RWL 8 1.50 1.41
Viewing 8 1.00 1.77
plot_box(delayed_recall_target); summary_by_condition(delayed_recall_target)

Condition N M SD
Reading 3 1.33 0.58
RWL 4 2.25 2.63
Viewing 4 2.75 4.27