knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(haven)
library(dplyr)
library(ggvenn)
# 從原始 SAS 檔案中讀取資料
d4_die <- read_sas("d4_die.sas7bdat")
cis_newid <- read_sas("cis_newid.sas7bdat")
bone_newid <- read_sas("bone_newid.sas7bdat")

以 d4_die 為研究主體,保留每位對象最早(Earliest)的篩檢紀錄,且不進行 ID 篩選,確保納入所有人。

final_base <- d4_die %>%
  arrange(newid, Screen_date) %>%
  distinct(newid, .keep_all = TRUE) %>%
  mutate(death = ifelse(!is.na(D_date) & nchar(as.character(D_date)) == 8, 1, 0)) #D_date 不為空值且字元數等於 8 為死亡 (1),其餘為存活 (0)

cat("最終納入分析的研究族群人數 (N):", nrow(final_base), "\n")
## 最終納入分析的研究族群人數 (N): 8222

補入變項

若個案未參與該項檢測,其變項將為 NA

# 定義欲保留的變項
bone_vars <- c("_COL1","_COL2","_COL3","_COL4","_COL5","_COL6","_COL17","_COL19")
cis_vars  <- c("bir_y")
d4_vars   <- c("gender","residence","marriage","edu","go_out","visit","helpful",
               "conversation","memory","depressed_mood","anxiety","stress",
               paste0("PHQ_", 1:9), "PHQ_total", paste0("AD_", 1:8), "AD_total",
               "BH","BW","BW_loss_year","stand_up","Not_energetic","grip","walk_4s",
               paste0("ADL_", 1:10), "ADL_total","e1","e2","e3","e4","e5","e8","e21",
               "smoke","Drinkhabbit","chewnut","coffee","Exercise")

final_dataset <- final_base %>%
  left_join(
    bone_newid %>% 
      filter(`_COL3` == 0) %>%
      select(newid = newidno2, all_of(bone_vars)) %>%
      distinct(newid, .keep_all = TRUE), 
    by = "newid"
  ) %>%
  left_join(
    cis_newid %>% 
      select(newid = newidno2, all_of(cis_vars)) %>%
      distinct(newid, .keep_all = TRUE), 
    by = "newid"
  ) 
analysis_dataset <- final_dataset %>%
  select(
    newid, Screen_date, D_date, death,
    all_of(bone_vars),
    all_of(cis_vars),
    all_of(d4_vars)
  )

cat("最終資料集維度:", dim(analysis_dataset), "\n")
## 最終資料集維度: 8222 74