knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(haven)
library(dplyr)
library(ggvenn)
# 從原始 SAS 檔案中讀取資料
d4_die <- read_sas("d4_die.sas7bdat")
cis_newid <- read_sas("cis_newid.sas7bdat")
bone_newid <- read_sas("bone_newid.sas7bdat")
以 d4_die 為研究主體,保留每位對象最早(Earliest)的篩檢紀錄,且不進行 ID 篩選,確保納入所有人。
final_base <- d4_die %>%
arrange(newid, Screen_date) %>%
distinct(newid, .keep_all = TRUE) %>%
mutate(death = ifelse(!is.na(D_date) & nchar(as.character(D_date)) == 8, 1, 0)) #D_date 不為空值且字元數等於 8 為死亡 (1),其餘為存活 (0)
cat("最終納入分析的研究族群人數 (N):", nrow(final_base), "\n")
## 最終納入分析的研究族群人數 (N): 8222
若個案未參與該項檢測,其變項將為 NA
# 定義欲保留的變項
bone_vars <- c("_COL1","_COL2","_COL3","_COL4","_COL5","_COL6","_COL17","_COL19")
cis_vars <- c("bir_y")
d4_vars <- c("gender","residence","marriage","edu","go_out","visit","helpful",
"conversation","memory","depressed_mood","anxiety","stress",
paste0("PHQ_", 1:9), "PHQ_total", paste0("AD_", 1:8), "AD_total",
"BH","BW","BW_loss_year","stand_up","Not_energetic","grip","walk_4s",
paste0("ADL_", 1:10), "ADL_total","e1","e2","e3","e4","e5","e8","e21",
"smoke","Drinkhabbit","chewnut","coffee","Exercise")
final_dataset <- final_base %>%
left_join(
bone_newid %>%
filter(`_COL3` == 0) %>%
select(newid = newidno2, all_of(bone_vars)) %>%
distinct(newid, .keep_all = TRUE),
by = "newid"
) %>%
left_join(
cis_newid %>%
select(newid = newidno2, all_of(cis_vars)) %>%
distinct(newid, .keep_all = TRUE),
by = "newid"
)
analysis_dataset <- final_dataset %>%
select(
newid, Screen_date, D_date, death,
all_of(bone_vars),
all_of(cis_vars),
all_of(d4_vars)
)
cat("最終資料集維度:", dim(analysis_dataset), "\n")
## 最終資料集維度: 8222 74