# 1. 讀取台灣與印尼語音資料
data_tw <- read_excel("Taiwan_Result_1141125.xlsx") %>%
  rename(Tone = Tones2)

data_id <- read_excel("indonesia_Result_1141202.xlsx") %>%
  rename(Tone = Tone2)

# 2. 合併資料與計算各時間點平均基頻 (F0)
summary_data <- bind_rows(data_tw, data_id) %>%
  select(region, Tone, `0%V`:`100%V`) %>%
  pivot_longer(cols = ends_with("%V"), names_to = "TimePoint", values_to = "F0") %>%
  mutate(TimePoint = as.numeric(gsub("%V", "", TimePoint))) %>%
  group_by(region, Tone, TimePoint) %>%
  summarise(Mean_F0 = mean(F0, na.rm = TRUE), .groups = 'drop')
# 3. 繪製四聲調基頻曲線比較圖
ggplot(data = summary_data, aes(x = TimePoint, y = Mean_F0, color = region, linetype = region)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  facet_wrap(~ Tone) +
  scale_x_continuous(breaks = seq(0, 100, by = 10)) +
  labs(title = "華語四聲調基頻曲線對比 (台灣母語者 vs 印尼學習者)",
       x = "發音時間比例 (%)",
       y = "平均基頻 F0 (Hz)") +
  theme_minimal()