資料預處理摘要: 本研究以 d4_die 原始資料庫與 cis_newid 資料庫之交集個案作為研究主體,確保每位個案皆具備出生年資歷。針對重複篩檢者,統一保留其最早(Earliest)之一筆紀錄作為基準點。骨密度資料(bone_newid 且年齡滿 65 歲者)則採補件方式併入,若個案缺乏骨密度檢測資料,相關欄位以缺失值(NA)處理,不影響個案之納入。最終分析族群總人數為 4,354 人。
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(haven)
library(dplyr)
library(ggvenn)
# 從原始 SAS 檔案中讀取資料
d4_die <- read_sas("d4_die.sas7bdat")
cis_newid <- read_sas("cis_newid.sas7bdat")
bone_newid <- read_sas("bone_newid.sas7bdat")
出現在 bone_newid 且 _COL3 == 0 (滿 65 歲)。 出現在 cis_newid 資料中。
# 保留滿 65 歲的人,並轉化欄位名稱以利對接
bone65 <- bone_newid %>%
filter(`_COL3` == 0) %>%
select(newid = newidno2) %>%
distinct()
# 提取 CIS 資料的唯一 ID
cis_id <- cis_newid %>%
select(newid = newidno2) %>%
distinct()
# 取聯集 (Union):只要符合其中一個條件就納入
#all_target_ids <- bind_rows(bone65, cis_id) %>%
all_target_ids <- cis_id %>%
distinct(newid)
cat("符合納入標準 (Bone 65+ 或 CIS) 的總人數:", nrow(all_target_ids), "\n")
## 符合納入標準 (Bone 65+ 或 CIS) 的總人數: 14378
以 d4_die 為主體進行合併,並僅保留符合上述 ID 清單的人員。 篩選:僅保留屬於上述目標 ID 清單的人。
去重複:先按 newid 與 Screen_date 排序,保留時間上最遠(最早)的一筆資料作為分析基準。。
final_base <- d4_die %>%
filter(newid %in% all_target_ids$newid) %>%
arrange(newid, Screen_date) %>%
distinct(newid, .keep_all = TRUE) %>%
mutate(death = ifelse(!is.na(D_date) & nchar(as.character(D_date)) == 8, 1, 0)) #D_date 不為空值且字元數等於 8 為死亡 (1),其餘為存活 (0)
cat("最終納入分析的研究族群人數 (N):", nrow(final_base), "\n")
## 最終納入分析的研究族群人數 (N): 400
將 bone_newid (滿65歲) 與 cis_newid 的特定欄位併入主表。
# 定義欲保留的變項
bone_vars <- c("_COL1","_COL2","_COL3","_COL4","_COL5","_COL6","_COL17","_COL19")
cis_vars <- c("bir_y")
d4_vars <- c("gender","residence","marriage","edu","go_out","visit","helpful",
"conversation","memory","depressed_mood","anxiety","stress",
paste0("PHQ_", 1:9), "PHQ_total", paste0("AD_", 1:8), "AD_total",
"BH","BW","BW_loss_year","stand_up","Not_energetic","grip","walk_4s",
paste0("ADL_", 1:10), "ADL_total","e1","e2","e3","e4","e5","e8","e21",
"smoke","Drinkhabbit","chewnut","coffee","Exercise")
final_dataset <- final_base %>%
left_join(
bone_newid %>%
filter(`_COL3` == 0) %>%
select(newid = newidno2, all_of(bone_vars)) %>%
distinct(newid, .keep_all = TRUE),
by = "newid"
) %>%
left_join(
cis_newid %>%
select(newid = newidno2, all_of(cis_vars)) %>%
distinct(newid, .keep_all = TRUE),
by = "newid"
)
analysis_dataset <- final_dataset %>%
select(
newid, Screen_date, D_date, death,
all_of(bone_vars),
all_of(cis_vars),
all_of(d4_vars)
)
cat("最終資料集維度:", dim(analysis_dataset), "\n")
## 最終資料集維度: 400 74
venn_list <- list(
"d4_die" = d4_die$newid,
"Bone 65+" = bone65$newid,
"CIS" = cis_id$newid
)
ggvenn(venn_list, fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF"), text_size = 3)