knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(haven)
library(dplyr)
library(ggvenn)

# 從原始 SAS 檔案中讀取資料
d4_die <- read_sas("d4_die.sas7bdat")
cis_newid <- read_sas("cis_newid.sas7bdat")
bone_newid <- read_sas("bone_newid.sas7bdat")
# 一人一筆 ID
A <- d4_die %>% select(newid) %>% distinct()
B <- bone_newid %>% select(newid = newidno2) %>% distinct()
C <- cis_newid %>% select(newid = newidno2) %>% distinct()

# --- 計算 7 個獨立區塊 ---

# 1. 三者共有 (A ∩ B ∩ C)
d4_bone_cis <- A %>% 
  inner_join(B, by = "newid") %>% 
  inner_join(C, by = "newid") %>% 
  nrow()

# d4_die 與 bone_newid 共有
d4_bone_only <- A %>% 
  inner_join(B, by = "newid") %>% 
  anti_join(C, by = "newid") %>% 
  nrow()

# d4_die 與 cis_newid 共有
d4_cis_only <- A %>% 
  inner_join(C, by = "newid") %>% 
  anti_join(B, by = "newid") %>% 
  nrow()

# bone_newid 與 cis_newid 共有
bone_cis_only <- B %>% 
  inner_join(C, by = "newid") %>% 
  anti_join(A, by = "newid") %>% 
  nrow()

# 3. 各自獨有
# 僅在 d4_die
d4_only <- A %>% 
  anti_join(B, by = "newid") %>% 
  anti_join(C, by = "newid") %>% 
  nrow()

# 僅在 bone_newid
bone_only <- B %>% 
  anti_join(A, by = "newid") %>% 
  anti_join(C, by = "newid") %>% 
  nrow()

# 僅在 cis_newid
cis_only <- C %>% 
  anti_join(A, by = "newid") %>% 
  anti_join(B, by = "newid") %>% 
  nrow()
venn_summary <- data.frame(
  區塊名稱 = c("僅在 d4_die (A)", "僅在 bone_newid (B)", "僅在 cis_newid (C)", 
               "d4 & bone 共有", "d4 & cis 共有", "bone & cis 共有", 
               "三者皆共有"),
  人數_N = c(d4_only, bone_only, cis_only, 
             d4_bone_only, d4_cis_only, bone_cis_only, 
             d4_bone_cis)
)

knitr::kable(venn_summary, caption = "文氏圖 7 個區塊人數統計")
文氏圖 7 個區塊人數統計
區塊名稱 人數_N
僅在 d4_die (A) 3867
僅在 bone_newid (B) 7131
僅在 cis_newid (C) 10518
d4 & bone 共有 3955
d4 & cis 共有 211
bone & cis 共有 3460
三者皆共有 189
venn_list <- list(
  "d4_die"     = d4_die$newid,
  "bone_newid" = bone_newid$newidno2,
  "cis_newid"  = cis_newid$newidno2
)

# 3. 繪製文氏圖
ggvenn(
  venn_list, 
  columns = c("d4_die", "bone_newid", "cis_newid"),
  fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF"), # 設定顏色
  stroke_size = 0.5,           # 線條粗細
  set_name_size = 4,           # 集合名稱文字大小
  text_size = 3.5,             # 區塊內數字文字大小
  show_percentage = TRUE       # 是否顯示百分比
)