pacman::p_load(tidyverse, lubridate, tableone, tictoc,
survival, survey, survminer, car, svglite)
df_PEO_total <-
read_csv("MIMIC_HDS.csv") |>
mutate(GENDER = ifelse(GENDER == "M", 1, 0))
df_PEO_total
col_continuous <-
c("age", "height", "weight", "BMI",
"heartrate", "sysbp", "diasbp", "meanbp", "resprate", "tempc", "spo2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")
col_factors <-
c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"mortality30", "hospital_death",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
df_PEO_total |>
select(all_of(c(col_continuous, col_factors, "h2ras")))|>
CreateTableOne(vars = c(col_continuous, col_factors), strata = "h2ras", factorVars = col_factors) -> tableone
print(tableone, smd = T, missing = T, test = F, explain = F) |>
as.data.frame() |>
write.csv("unmatched.csv")
df_unmatched_table <- read.csv("unmatched.csv",
col.names = c("name", "non_user", "user", "smd", "missing"))
df_unmatched_table
df_PEO_total |>
select(col_continuous) |>
pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |>
ggplot()+
geom_histogram(aes(x = value), color = "black")+
facet_wrap(~ name, scales = "free", ncol = 5) +
theme_bw()+
theme(text = element_text(size = 12))
30day mortality
df_PEO_total |> select(h2ras, mortality30, hospital_death) |>
CreateTableOne(vars = c("mortality30"),
strata = "h2ras",
factorVars = c("mortality30")) -> result_01
print(result_01) |> as.data.frame() |>
rename(H2RA = '1',
Non_H2RA = '0') |>
write.csv("result_01.csv")
## Stratified by h2ras
## 0 1 p test
## n 5949 4441
## mortality30 = 1 (%) 1301 (21.9) 546 (12.3) <0.001
unadjusted
# df_PEO_total |>
# mutate(futime30 = ifelse(mortality30 == 1, deathday-admitday, 30)) |>
# # filter(futime30 < 0)
# ggplot()+
# geom_histogram(aes(x=futime30), color="black") +
# xlim(c(-5, 1)) +
# theme_bw()
df_survival <-
df_PEO_total |>
mutate(death_duration = (ADMITTIME %--% DOD)/days(1)) %>%
mutate(futime30 = ifelse(mortality30 == 1, death_duration, 31)) |>
# filter(futime30 < -1)
mutate(futime30 = ifelse(futime30 <=0, 1, futime30)) |>
select(c(col_continuous, col_factors, h2ras, futime30))
df_survival %>% select(mortality30, futime30)
formula_1 = Surv(futime30, mortality30)~ h2ras
sfit_1 = surv_fit(formula_1, data = df_survival, conf.type = "log-log")
ggsurvplot(sfit_1,
conf.int=TRUE, pval=T, pval.method = T,
risk.table=TRUE,
break.time.by = 5,
break.y.by = 0.2,
palette = "Set1",
xlab = "Days",
ylab = "Cum survival",
legend.labs=c("Non-H2RA", "H2RA"),
legend.title = "",
risk.table.height=0.25,
main = "before matching"
)
「欠測データ処理(著:高橋将宣・渡辺美智子)」が参考になる。詳細を知りたい人はこの本を読むと良いです。
論文未記載なので、どのようなパッケージを使ったのか、いくつのデータセットを作成したのか、代入モデルには何を使ったのか、収束診断がどうだったのか等かなり不明な点が多い。再現は難しいが、とりあえずやってみる。パラメーター調整はかなり適当。
データセット数は多ければ多いほど良いとされ、通常のデータ規模では100~200くらいに設定している論文が多くなってきているが、大規模データでは5くらいの論文も散見される。時間がかかるので以下の再現では5としている。これでも結構時間がかかる(環境にもよるが、4~5分かかる)。
本来は欠測パターンの確認などを先に行うべきだが、割愛する。
pacman::p_load(mice, miceadds, lmtest, car, VIM)
character型の変数は数値型(または順序)にしないといけないが、この辺りの詳細も論文未記載なので、ある程度適当に変数を前処理していく必要がある。
df_survival |> select_if(is.character)
df_survival_mice <-
df_survival |>
mutate(ADMISSION_TYPE = factor(ADMISSION_TYPE, levels = c("ELECTIVE", "EMERGENCY", "URGENT")),
INSURANCE = factor(INSURANCE,
levels = c("Private", "Medicare", "Government", "Self Pay")),
ethnicity_5 = factor(ethnicity_5,
levels = c("white", "hispanic", "asian", "black", "other")),
LVEF_ctg = factor(LVEF_ctg,
levels = c("normal", "mild", "moderate", "severe"))
)
df_survival_mice |> select_if(is.factor)
前述のとおりm=5にしている。
さらに、maxit =
30程度にすると20分以上かかるので、デフォルトの5にしている。
maxit=5, m=5: 4~5分
(n.core = 5にすると1分くらい)
tic()
imp1 <- mice::futuremice(data = df_survival_mice,
m = 5,
n.core = 1,
maxit = 5)
toc()
m = 200, maxit = 35 にすると12時間でも終わらない(計算上20時間くらいかかる)
通常マルチコアを有効利用していないので、並列処理すると早くなる。
スレッド数とCPU性能の掛け算なので、どの程度時間短縮になるかはPC環境次第。
maxit=5, m=5で 1分 に短縮される。
maxit=35, m=200で 182分 (corei7-9700K, 8c8t)
maxit=35, m=208で 115分 (Ryzen7-5700G, 8c16t)
(CPUの温度管理に注意)
行数確認
df_survival |> count()
df_survival |> na.omit() |> count()
complete(imp1, 1) |> na.omit() |> count()
収束診断などは今回はスキップする
(maxit=5だと収束してないと思われる)
詳しく知りたい人は上述した教科書を読むこと。
plot(imp1, layout = c(2, 1))[c(1,2), 1]
まずはフルモデル(51変数)を作成し、各補完データセット(ここでは5個)におけるVIFを算出する。
次に5つの平均をとる。一般的にVIFは5~10以上で問題になる。
cols_cph_full <-
c("h2ras",
"age", "BMI",
"heartrate",
"sysbp",
"diasbp",
"meanbp", "resprate", "tempc", "spo2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
"ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
formula_full <-
cols_cph_full |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_full
## Surv(futime30, mortality30) ~ h2ras + age + BMI + heartrate +
## sysbp + diasbp + meanbp + resprate + tempc + spo2 + urine_out +
## SOFA + apsiii + Calcium + Cre + Glucose + Mg + Sodium + BUN +
## Plate + RBC + WBC + ADMISSION_TYPE + INSURANCE + GENDER +
## ethnicity_5 + rrt + ventilation + hypertension + DM + anemia +
## Af + coronary_atherosclerosis + venous_thrombus + MI + gastritis +
## gastric_ulcer + duodenal_ulcer + gastric_bleeding + aki +
## septic_shock + pneumonia + LVEF_ctg + RAA + Diuritic + inotrop +
## anti_adrenal + ca_blocker + anticoag + antiplate + ppi
## <environment: 0x00000236d52b4900>
問題になりそうな変数は平均血圧(収縮期、拡張期と相関していることが予想される)だけだが、論文では血圧はすべて除外されていた。
M <- 5
VIF <- matrix(NA, nrow = M, ncol = 51) |>
as.data.frame() |>
setNames(cols_cph_full)
for (i in 1:M) {
data <- complete(imp1, i)
cph_full <- coxph(formula_full, data = data)
VIF[i,] <- car::vif(cph_full) |> unlist() |> as.vector()
}
vif_res <-
map(VIF, mean) |>
unlist() |>
as.data.frame() |>
setNames("vif")
vif_res
model_1_imp <-
with(data=imp1,
exp=coxph(Surv(futime30, mortality30)~ h2ras))
summary(pool(model_1_imp),conf.int=T) |>
as.data.frame() |>
mutate(HR = exp(estimate),
lower = exp(estimate - 1.96*std.error),
upper = exp(estimate + 1.96*std.error)) |>
select(term, HR, lower, upper, p.value) |>
mutate_if(is.numeric, round, 3) |>
write.csv("model_1.csv")
model_1 <- read.csv("model_1.csv")
model_1
formula
cols_cph_2 <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
)
formula_2 <-
cols_cph_2 |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5
## <environment: 0x00000236c1055040>
model_2_imp <-
with(data=imp1,
exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI +
GENDER + ADMISSION_TYPE + INSURANCE + ethnicity_5))
summary(pool(model_2_imp),conf.int=T) |>
as.data.frame() |>
mutate(HR = exp(estimate),
lower = exp(estimate - 1.96*std.error),
upper = exp(estimate + 1.96*std.error)) |>
select(term, HR, lower, upper, p.value) |>
mutate_if(is.numeric, round, 3) |>
write.csv("model_2.csv")
model_2 <- read.csv("model_2.csv")
model_2
formula
cols_cph_3 <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
"resprate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
"urine_out", "Glucose", "Calcium", "Cre", "Mg", "Sodium", "BUN",
"Plate", "RBC", "WBC",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia"
)
formula_3 <-
cols_cph_3 |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_3
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii +
## rrt + ventilation + urine_out + Glucose + Calcium + Cre +
## Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic +
## inotrop + anti_adrenal + ca_blocker + anticoag + antiplate +
## ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis +
## venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
## aki + septic_shock + pneumonia
## <environment: 0x00000236bca12d98>
model_3_imp <-
with(data=imp1,
exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii +
rrt + ventilation + urine_out + Glucose + Calcium + Cre +
Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic +
inotrop + anti_adrenal + ca_blocker + anticoag + antiplate +
ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis +
venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
aki + septic_shock + pneumonia))
summary(pool(model_3_imp),conf.int=T) |>
as.data.frame() |>
mutate(HR = exp(estimate),
lower = exp(estimate - 1.96*std.error),
upper = exp(estimate + 1.96*std.error)) |>
select(term, HR, lower, upper, p.value) |>
mutate_if(is.numeric, round, 3) |>
write.csv("model_3.csv")
model_3 <- read.csv("model_3.csv")
model_3
model_1 から model_3までの結果。論文上の数値と近い。
bind_rows(model_1 |> filter(term == "h2ras"),
model_2 |> filter(term == "h2ras"),
model_3 |> filter(term == "h2ras")) |>
mutate(model = c("model_1", "model_2", "model_3"), .before=HR) |>
select(!X) |>
write_csv("unadjusted_models.csv")
(unadjusted_models <- read_csv("unadjusted_models.csv"))
「統計的因果推論の理論と実装(著:高橋将宣)」が参考になる。
本演習では細かいところは扱えないので、興味のある人は読むと良いです。
多重代入後のPS解析についても詳しく書かれている。
本論文では多重代入した1つのデータセットに対して操作している節があるが、以下では可能な限り全データセットを扱いたいので、MatchThemパッケージを利用する。
pacman::p_load(MatchThem, cobalt)
formula作成
cols <-
c(
"age", "BMI",
"heartrate", "meanbp", "resprate", "tempc", "spo2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
"ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
ps_f <-
cols %>%
str_c(collapse = " + ") %>%
str_c("h2ras ~ ", .) %>%
formula()
ps_f
## h2ras ~ age + BMI + heartrate + meanbp + resprate + tempc + spo2 +
## urine_out + SOFA + apsiii + Calcium + Cre + Glucose + Mg +
## Sodium + BUN + Plate + RBC + WBC + ADMISSION_TYPE + INSURANCE +
## GENDER + ethnicity_5 + rrt + ventilation + hypertension +
## DM + anemia + Af + coronary_atherosclerosis + venous_thrombus +
## MI + gastritis + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
## aki + septic_shock + pneumonia + LVEF_ctg + RAA + Diuritic +
## inotrop + anti_adrenal + ca_blocker + anticoag + antiplate +
## ppi
## <environment: 0x00000236da33b5b0>
matching
matchtem_data <-
matchthem(
formula= ps_f,
datasets = imp1,
approach = "within",
method = "nearest",
distance = "glm",
caliper = 0.2,
ratio = 1,
replace = F)
マッチしなかった行のweightsが0になる仕様 データセット1のマッチした行だけ見るなら以下 6716例
complete(matchtem_data, action = 1) %>%
filter(weights != 0)
summary(matchtem_data)
##
## Call:
## matchthem(formula = ps_f, datasets = imp1, approach = "within",
## method = "nearest", distance = "glm", caliper = 0.2, ratio = 1,
## replace = F)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.5577 0.3302 1.0474 1.1661
## age 71.2522 74.3456 -0.2214 0.9547
## BMI 28.9950 28.6136 0.0471 1.0429
## heartrate 87.9847 88.2153 -0.0123 0.8294
## meanbp 80.4224 80.5551 -0.0075 0.9260
## resprate 18.2793 19.9616 -0.2676 1.0209
## tempc 36.4934 36.5442 -0.0536 0.9191
## spo2 97.1628 96.4053 0.1548 0.7634
## urine_out 1834.8396 1779.0881 0.0459 0.9337
## SOFA 5.0354 4.5937 0.1464 0.9877
## apsiii 46.4871 48.6336 -0.1095 0.9099
## Calcium 8.6707 8.5672 0.1230 0.8973
## Cre 1.5948 1.8095 -0.1429 0.7755
## Glucose 153.7370 157.1977 -0.0427 0.8408
## Mg 2.0370 1.9866 0.0999 1.5487
## Sodium 138.2213 138.0284 0.0404 0.8436
## BUN 30.6264 36.6873 -0.2888 0.6568
## Plate 241.9448 245.3032 -0.0292 0.9460
## RBC 3.8752 3.8167 0.0801 0.9250
## WBC 11.5627 12.1072 -0.0380 1.3280
## ADMISSION_TYPEELECTIVE 0.1790 0.0693 0.2863 .
## ADMISSION_TYPEEMERGENCY 0.7985 0.8854 -0.2166 .
## ADMISSION_TYPEURGENT 0.0225 0.0454 -0.1541 .
## INSURANCEPrivate 0.2182 0.1703 0.1160 .
## INSURANCEMedicare 0.7656 0.8119 -0.1093 .
## INSURANCEGovernment 0.0131 0.0134 -0.0034 .
## INSURANCESelf Pay 0.0032 0.0044 -0.0217 .
## GENDER 0.5508 0.5167 0.0685 .
## ethnicity_5white 0.7417 0.7247 0.0390 .
## ethnicity_5hispanic 0.0286 0.0207 0.0475 .
## ethnicity_5asian 0.0180 0.0170 0.0078 .
## ethnicity_5black 0.0817 0.0849 -0.0115 .
## ethnicity_5other 0.1299 0.1528 -0.0680 .
## rrt 0.0311 0.0261 0.0289 .
## ventilation 0.6393 0.4038 0.4904 .
## hypertension 0.4560 0.3965 0.1193 .
## DM 0.3697 0.3585 0.0232 .
## anemia 0.3184 0.3350 -0.0357 .
## Af 0.4580 0.4265 0.0633 .
## coronary_atherosclerosis 0.4614 0.3806 0.1621 .
## venous_thrombus 0.0173 0.0188 -0.0114 .
## MI 0.1615 0.1683 -0.0185 .
## gastritis 0.0137 0.0203 -0.0567 .
## gastric_ulcer 0.0045 0.0113 -0.1009 .
## duodenal_ulcer 0.0056 0.0119 -0.0843 .
## gastric_bleeding 0.0414 0.0864 -0.2256 .
## aki 0.3173 0.3799 -0.1346 .
## septic_shock 0.0588 0.0696 -0.0460 .
## pneumonia 0.2087 0.2258 -0.0419 .
## LVEF_ctgnormal 0.4449 0.4209 0.0484 .
## LVEF_ctgmild 0.1576 0.1479 0.0266 .
## LVEF_ctgmoderate 0.1799 0.1827 -0.0073 .
## LVEF_ctgsevere 0.2175 0.2484 -0.0750 .
## RAA 0.6129 0.4841 0.2645 .
## Diuritic 0.9194 0.7247 0.7153 .
## inotrop 0.5102 0.2992 0.4222 .
## anti_adrenal 0.9228 0.7462 0.6615 .
## ca_blocker 0.3859 0.2937 0.1896 .
## anticoag 0.9212 0.8057 0.4287 .
## antiplate 0.8246 0.6184 0.5421 .
## ppi 0.6726 0.7279 -0.1178 .
## eCDF Mean eCDF Max
## distance 0.2738 0.3939
## age 0.0703 0.1133
## BMI 0.0148 0.0341
## heartrate 0.0133 0.0490
## meanbp 0.0078 0.0265
## resprate 0.0388 0.1587
## tempc 0.0088 0.0293
## spo2 0.0123 0.1137
## urine_out 0.0169 0.0445
## SOFA 0.0206 0.0799
## apsiii 0.0159 0.0688
## Calcium 0.0108 0.0503
## Cre 0.0151 0.0980
## Glucose 0.0061 0.0324
## Mg 0.0103 0.0476
## Sodium 0.0069 0.0358
## BUN 0.0359 0.1154
## Plate 0.0077 0.0265
## RBC 0.0127 0.0371
## WBC 0.0099 0.0352
## ADMISSION_TYPEELECTIVE 0.1098 0.1098
## ADMISSION_TYPEEMERGENCY 0.0869 0.0869
## ADMISSION_TYPEURGENT 0.0229 0.0229
## INSURANCEPrivate 0.0479 0.0479
## INSURANCEMedicare 0.0463 0.0463
## INSURANCEGovernment 0.0004 0.0004
## INSURANCESelf Pay 0.0012 0.0012
## GENDER 0.0341 0.0341
## ethnicity_5white 0.0171 0.0171
## ethnicity_5hispanic 0.0079 0.0079
## ethnicity_5asian 0.0010 0.0010
## ethnicity_5black 0.0031 0.0031
## ethnicity_5other 0.0229 0.0229
## rrt 0.0050 0.0050
## ventilation 0.2355 0.2355
## hypertension 0.0594 0.0594
## DM 0.0112 0.0112
## anemia 0.0166 0.0166
## Af 0.0315 0.0315
## coronary_atherosclerosis 0.0808 0.0808
## venous_thrombus 0.0015 0.0015
## MI 0.0068 0.0068
## gastritis 0.0066 0.0066
## gastric_ulcer 0.0068 0.0068
## duodenal_ulcer 0.0063 0.0063
## gastric_bleeding 0.0450 0.0450
## aki 0.0626 0.0626
## septic_shock 0.0108 0.0108
## pneumonia 0.0170 0.0170
## LVEF_ctgnormal 0.0240 0.0240
## LVEF_ctgmild 0.0097 0.0097
## LVEF_ctgmoderate 0.0028 0.0028
## LVEF_ctgsevere 0.0309 0.0309
## RAA 0.1288 0.1288
## Diuritic 0.1947 0.1947
## inotrop 0.2110 0.2110
## anti_adrenal 0.1766 0.1766
## ca_blocker 0.0923 0.0923
## anticoag 0.1155 0.1155
## antiplate 0.2062 0.2062
## ppi 0.0553 0.0553
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.4807 0.4540 0.1231 1.2245
## age 72.2893 72.7264 -0.0313 0.9253
## BMI 28.9746 29.0710 -0.0119 1.0305
## heartrate 88.4648 88.6285 -0.0087 0.9033
## meanbp 81.0890 81.2897 -0.0114 1.0225
## resprate 19.2879 19.5782 -0.0462 1.1457
## tempc 36.5353 36.5476 -0.0130 0.9783
## spo2 96.7843 96.6774 0.0219 1.1939
## urine_out 1809.3511 1782.0661 0.0224 1.0678
## SOFA 4.7828 4.6896 0.0309 0.9950
## apsiii 47.5591 48.0217 -0.0236 0.9996
## Calcium 8.6485 8.6506 -0.0025 0.9787
## Cre 1.6819 1.7400 -0.0387 0.8301
## Glucose 158.0229 158.9538 -0.0115 0.8708
## Mg 2.0042 2.0015 0.0054 1.6739
## Sodium 138.0518 138.0390 0.0027 1.0660
## BUN 32.6041 33.7221 -0.0533 0.9963
## Plate 246.8558 247.5225 -0.0058 0.9858
## RBC 3.8848 3.8772 0.0104 0.9476
## WBC 11.7396 11.8951 -0.0108 1.4419
## ADMISSION_TYPEELECTIVE 0.1111 0.0995 0.0303 .
## ADMISSION_TYPEEMERGENCY 0.8636 0.8782 -0.0364 .
## ADMISSION_TYPEURGENT 0.0253 0.0223 0.0201 .
## INSURANCEPrivate 0.1987 0.1951 0.0087 .
## INSURANCEMedicare 0.7843 0.7870 -0.0063 .
## INSURANCEGovernment 0.0137 0.0146 -0.0079 .
## INSURANCESelf Pay 0.0033 0.0033 0.0000 .
## GENDER 0.5287 0.5284 0.0006 .
## ethnicity_5white 0.7376 0.7507 -0.0299 .
## ethnicity_5hispanic 0.0277 0.0256 0.0125 .
## ethnicity_5asian 0.0188 0.0173 0.0112 .
## ethnicity_5black 0.0885 0.0906 -0.0076 .
## ethnicity_5other 0.1275 0.1159 0.0346 .
## rrt 0.0319 0.0304 0.0086 .
## ventilation 0.5430 0.4972 0.0955 .
## hypertension 0.4313 0.4290 0.0048 .
## DM 0.3753 0.3810 -0.0117 .
## anemia 0.3286 0.3301 -0.0032 .
## Af 0.4436 0.4313 0.0245 .
## coronary_atherosclerosis 0.4176 0.4027 0.0299 .
## venous_thrombus 0.0191 0.0194 -0.0023 .
## MI 0.1665 0.1600 0.0178 .
## gastritis 0.0164 0.0182 -0.0154 .
## gastric_ulcer 0.0057 0.0077 -0.0311 .
## duodenal_ulcer 0.0071 0.0077 -0.0080 .
## gastric_bleeding 0.0515 0.0560 -0.0224 .
## aki 0.3595 0.3756 -0.0346 .
## septic_shock 0.0709 0.0745 -0.0152 .
## pneumonia 0.2359 0.2332 0.0066 .
## LVEF_ctgnormal 0.4459 0.4337 0.0246 .
## LVEF_ctgmild 0.1498 0.1534 -0.0098 .
## LVEF_ctgmoderate 0.1787 0.1838 -0.0132 .
## LVEF_ctgsevere 0.2255 0.2291 -0.0087 .
## RAA 0.5967 0.5979 -0.0024 .
## Diuritic 0.8987 0.8937 0.0186 .
## inotrop 0.4483 0.4179 0.0608 .
## anti_adrenal 0.9032 0.9032 0.0000 .
## ca_blocker 0.3905 0.3789 0.0239 .
## anticoag 0.9276 0.9363 -0.0321 .
## antiplate 0.7796 0.7733 0.0164 .
## ppi 0.7495 0.7682 -0.0400 .
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.0303 0.0938 0.1232
## age 0.0151 0.0360 1.1186
## BMI 0.0060 0.0179 1.0177
## heartrate 0.0077 0.0325 1.2027
## meanbp 0.0060 0.0214 1.1170
## resprate 0.0112 0.0691 1.0473
## tempc 0.0033 0.0167 1.1015
## spo2 0.0040 0.0319 0.8335
## urine_out 0.0045 0.0140 1.0897
## SOFA 0.0049 0.0226 1.0765
## apsiii 0.0046 0.0223 1.0667
## Calcium 0.0024 0.0191 1.0839
## Cre 0.0037 0.0250 0.8466
## Glucose 0.0039 0.0176 0.9713
## Mg 0.0024 0.0125 0.8600
## Sodium 0.0022 0.0140 1.0853
## BUN 0.0078 0.0337 1.0677
## Plate 0.0037 0.0143 1.0530
## RBC 0.0048 0.0155 1.1237
## WBC 0.0038 0.0206 0.4882
## ADMISSION_TYPEELECTIVE 0.0116 0.0116 0.4515
## ADMISSION_TYPEEMERGENCY 0.0146 0.0146 0.5250
## ADMISSION_TYPEURGENT 0.0030 0.0030 0.3172
## INSURANCEPrivate 0.0036 0.0036 0.7688
## INSURANCEMedicare 0.0027 0.0027 0.7981
## INSURANCEGovernment 0.0009 0.0009 0.2440
## INSURANCESelf Pay 0.0000 0.0000 0.0066
## GENDER 0.0003 0.0003 1.0067
## ethnicity_5white 0.0131 0.0131 0.8657
## ethnicity_5hispanic 0.0021 0.0021 0.3056
## ethnicity_5asian 0.0015 0.0015 0.2710
## ethnicity_5black 0.0021 0.0021 0.6035
## ethnicity_5other 0.0116 0.0116 0.6282
## rrt 0.0015 0.0015 0.3382
## ventilation 0.0459 0.0459 0.8660
## hypertension 0.0024 0.0024 0.9892
## DM 0.0057 0.0057 0.9546
## anemia 0.0015 0.0015 0.9508
## Af 0.0122 0.0122 0.9692
## coronary_atherosclerosis 0.0149 0.0149 0.9417
## venous_thrombus 0.0003 0.0003 0.2898
## MI 0.0066 0.0066 0.7319
## gastritis 0.0018 0.0018 0.2918
## gastric_ulcer 0.0021 0.0021 0.1913
## duodenal_ulcer 0.0006 0.0006 0.1991
## gastric_bleeding 0.0045 0.0045 0.4978
## aki 0.0161 0.0161 0.9793
## septic_shock 0.0036 0.0036 0.5801
## pneumonia 0.0027 0.0027 0.8759
## LVEF_ctgnormal 0.0122 0.0122 0.9968
## LVEF_ctgmild 0.0036 0.0036 0.6998
## LVEF_ctgmoderate 0.0051 0.0051 0.7623
## LVEF_ctgsevere 0.0036 0.0036 0.8390
## RAA 0.0012 0.0012 0.9345
## Diuritic 0.0051 0.0051 0.5504
## inotrop 0.0304 0.0304 0.8509
## anti_adrenal 0.0000 0.0000 0.1436
## ca_blocker 0.0116 0.0116 0.9344
## anticoag 0.0086 0.0086 0.4522
## antiplate 0.0063 0.0063 0.8138
## ppi 0.0188 0.0188 0.7586
##
## Sample Sizes:
## Control Treated
## All 5949 4441
## Matched 3357 3357
## Unmatched 2592 1084
## Discarded 0 0
bal.tab(matchtem_data)
## Balance summary across all imputations
## Type Min.Diff.Adj Mean.Diff.Adj Max.Diff.Adj
## distance Distance 0.1218 0.1239 0.1260
## age Contin. -0.0516 -0.0377 -0.0298
## BMI Contin. -0.0119 -0.0015 0.0050
## heartrate Contin. -0.0215 -0.0119 -0.0025
## meanbp Contin. -0.0126 -0.0076 0.0022
## resprate Contin. -0.0581 -0.0500 -0.0362
## tempc Contin. -0.0130 -0.0040 0.0087
## spo2 Contin. -0.0006 0.0202 0.0374
## urine_out Contin. -0.0006 0.0078 0.0224
## SOFA Contin. 0.0059 0.0320 0.0439
## apsiii Contin. -0.0491 -0.0200 -0.0062
## Calcium Contin. -0.0050 0.0041 0.0127
## Cre Contin. -0.0387 -0.0183 -0.0057
## Glucose Contin. -0.0246 -0.0153 -0.0115
## Mg Contin. -0.0077 0.0049 0.0204
## Sodium Contin. -0.0070 0.0005 0.0084
## BUN Contin. -0.0624 -0.0465 -0.0344
## Plate Contin. -0.0092 -0.0038 -0.0002
## RBC Contin. 0.0076 0.0140 0.0222
## WBC Contin. -0.0125 -0.0115 -0.0103
## ADMISSION_TYPE_ELECTIVE Binary 0.0089 0.0149 0.0218
## ADMISSION_TYPE_EMERGENCY Binary -0.0238 -0.0165 -0.0101
## ADMISSION_TYPE_URGENT Binary -0.0003 0.0016 0.0030
## INSURANCE_Private Binary 0.0036 0.0073 0.0113
## INSURANCE_Medicare Binary -0.0110 -0.0077 -0.0027
## INSURANCE_Government Binary -0.0012 0.0000 0.0015
## INSURANCE_Self Pay Binary 0.0000 0.0005 0.0009
## GENDER Binary 0.0003 0.0042 0.0072
## ethnicity_5_white Binary -0.0131 -0.0091 -0.0036
## ethnicity_5_hispanic Binary 0.0009 0.0027 0.0039
## ethnicity_5_asian Binary -0.0018 0.0005 0.0015
## ethnicity_5_black Binary -0.0024 -0.0009 0.0024
## ethnicity_5_other Binary 0.0039 0.0068 0.0116
## rrt Binary -0.0003 0.0017 0.0051
## ventilation Binary 0.0398 0.0418 0.0459
## hypertension Binary 0.0024 0.0078 0.0143
## DM Binary -0.0057 -0.0000 0.0050
## anemia Binary -0.0080 -0.0024 0.0048
## Af Binary 0.0015 0.0080 0.0128
## coronary_atherosclerosis Binary 0.0024 0.0122 0.0208
## venous_thrombus Binary -0.0015 -0.0007 0.0003
## MI Binary -0.0006 0.0036 0.0071
## gastritis Binary -0.0024 -0.0018 -0.0012
## gastric_ulcer Binary -0.0021 -0.0015 -0.0012
## duodenal_ulcer Binary -0.0006 -0.0002 0.0003
## gastric_bleeding Binary -0.0075 -0.0048 -0.0033
## aki Binary -0.0200 -0.0135 -0.0030
## septic_shock Binary -0.0065 -0.0042 -0.0021
## pneumonia Binary -0.0072 -0.0023 0.0027
## LVEF_ctg_normal Binary -0.0030 0.0018 0.0122
## LVEF_ctg_mild Binary -0.0036 0.0033 0.0095
## LVEF_ctg_moderate Binary -0.0077 -0.0047 0.0018
## LVEF_ctg_severe Binary -0.0048 -0.0005 0.0054
## RAA Binary -0.0012 0.0054 0.0137
## Diuritic Binary 0.0036 0.0051 0.0074
## inotrop Binary 0.0279 0.0316 0.0349
## anti_adrenal Binary 0.0000 0.0048 0.0098
## ca_blocker Binary 0.0024 0.0076 0.0128
## anticoag Binary -0.0086 -0.0066 -0.0027
## antiplate Binary 0.0063 0.0146 0.0197
## ppi Binary -0.0188 -0.0116 -0.0050
##
## Average sample sizes across imputations
## 0 1
## All 5949 4441
## Matched 3358 3358
## Unmatched 2591 1083
複数データセットなので、エラーバー付きになる。
love.plot(matchtem_data,
drop.distance = TRUE,
var.order = "unadjusted",
thresholds = 0.1,
abs = TRUE,
position = "top",
shapes = c("circle", "triangle"),
colors = c("red", "blue"),
size = 2)
各データセットでのバランス確認。
bal.plot(matchtem_data,
which = "both",
type = "histogram", bins=30,
mirror = TRUE) +
labs(title = "Distributional balance for Propensity score",
x = "Propensity score")
5つのデータセットがあるので、以下はそのうち1つを取り出したもの。
complete(matchtem_data, action = 1) %>%
filter(weights != 0) %>%
select(h2ras, mortality30, hospital_death) |>
CreateTableOne(vars = c("mortality30"),
strata = "h2ras",
factorVars = c("mortality30")) -> result_matched
print(result_matched) |> as.data.frame() |>
rename(H2RA = '1',
Non_H2RA = '0') |>
write.csv("result_matched.csv")
## Stratified by h2ras
## 0 1 p test
## n 3357 3357
## mortality30 = 1 (%) 638 (19.0) 487 (14.5) <0.001
5つのデータセットがあるので、以下はそのうち1つを取り出したもの。
(重ね合わせも可能だが、可読性は落ちる)
data
df_survival_matched <-
complete(matchtem_data, action = 1) %>%
mutate(futime30 = ifelse(futime30 <=0, 1, futime30)) %>%
filter(weights != 0)
formula_1 = Surv(futime30, mortality30)~ h2ras
sfit_m1 = surv_fit(formula_1, data = df_survival_matched, conf.type = "log-log")
# svglite(filename = "KM_1.svg")
ggsurvplot(sfit_m1,
conf.int=TRUE, pval=T, pval.method = T,
risk.table=TRUE,
break.time.by = 5,
break.y.by = 0.2,
palette = "Set1",
xlab = "Days",
ylab = "Cum survival",
legend.labs=c("Non-H2AR", "H2AR"),
legend.title = "",
risk.table.height=0.25,
main = "before matching"
)
# dev.off()
ds_list <- complete(matchtem_data, action = "all") |>
lapply(\(d) d |>
mutate(futime30 = ifelse(futime30 <= 0, 1, futime30)) |>
filter(weights > 0))
formula
formula_1 = Surv(futime30, mortality30)~ h2ras
fits_1 <- map(ds_list, ~ {
des <- svydesign(ids = ~subclass, weights = ~weights, data = .x)
svycoxph(formula_1, design = des)
})
pooled_1 <- miceadds::pool_mi(
qhat = lapply(fits_1, coef),
u = lapply(fits_1, vcov)
)
sum_p_1 <- summary(pooled_1) %>%
as.data.frame() |>
rownames_to_column(var = "term") %>%
setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_1, coef), u = lapply(fits_1,
## vcov))
## results se t p (lower upper)
## h2ras -0.3049414 0.06233841 -4.891709 1.333815e-06 -0.4274066 -0.1824762
## missInfo
## h2ras 9.1 %
sum_p_1
res_model_m1 <- sum_p_1 |>
filter(term == "h2ras") |>
transmute(
term = term,
HR= exp(results),
low = exp(lower),
uppe = exp(upper),
P = p
) |>
mutate(across(where(is.numeric), ~ round(.x, 3)))
res_model_m1
formula
cols_cph_2 <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
)
formula_2 <-
cols_cph_2 |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5
## <environment: 0x00000236e6570118>
fits_2 <- map(ds_list, ~ {
des <- svydesign(ids = ~subclass, weights = ~weights, data = .x)
svycoxph(formula_2, design = des)
})
pooled_2 <- miceadds::pool_mi(
qhat = lapply(fits_2, coef),
u = lapply(fits_2, vcov)
)
sum_p_2 <- summary(pooled_2) %>%
as.data.frame() |>
rownames_to_column(var = "term") %>%
setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_2, coef), u = lapply(fits_2,
## vcov))
## results se t p
## h2ras -0.283124008 0.063237343 -4.4771648 9.843682e-06
## age 0.025633481 0.003280719 7.8133718 9.766971e-13
## BMI -0.007435977 0.005924816 -1.2550563 2.119187e-01
## GENDER 0.040546782 0.060525538 0.6699120 5.029227e-01
## ADMISSION_TYPEEMERGENCY 0.999579259 0.157814924 6.3338703 7.960665e-10
## ADMISSION_TYPEURGENT 0.460837648 0.310617298 1.4836188 1.397267e-01
## INSURANCEMedicare 0.027259325 0.102982196 0.2646994 7.913457e-01
## INSURANCEGovernment -0.190613768 0.351662098 -0.5420367 5.878186e-01
## INSURANCESelf Pay 0.778602740 0.448787046 1.7349047 8.329455e-02
## ethnicity_5hispanic -0.170094503 0.242025796 -0.7027949 4.824413e-01
## ethnicity_5asian 0.194651035 0.222127316 0.8763039 3.818535e-01
## ethnicity_5black -0.191952000 0.126421463 -1.5183498 1.291568e-01
## ethnicity_5other 0.339431463 0.096142185 3.5305154 7.313692e-04
## (lower upper) missInfo
## h2ras -0.40743780 -0.158810221 10.4 %
## age 0.01915001 0.032116949 17.6 %
## BMI -0.01916777 0.004295811 19.7 %
## GENDER -0.07808921 0.159182772 1.5 %
## ADMISSION_TYPEEMERGENCY 0.68911094 1.310047576 11.6 %
## ADMISSION_TYPEURGENT -0.15224275 1.073918049 16.2 %
## INSURANCEMedicare -0.17505175 0.229570395 9.1 %
## INSURANCEGovernment -0.88003414 0.498806600 2.9 %
## INSURANCESelf Pay -0.10286263 1.660068110 8.7 %
## ethnicity_5hispanic -0.64536289 0.305173886 8.2 %
## ethnicity_5asian -0.24320194 0.632504006 14.5 %
## ethnicity_5black -0.43995247 0.056048468 5.5 %
## ethnicity_5other 0.14774194 0.531120985 25.7 %
sum_p_2
res_model_m2 <- sum_p_2 |>
filter(term == "h2ras") |>
transmute(
term = term,
HR= exp(results),
low = exp(lower),
uppe = exp(upper),
P = p
) |>
mutate(across(where(is.numeric), ~ round(.x, 3)))
res_model_m2
formula
cols_cph_3 <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
"resprate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
"urine_out", "Glucose", "Calcium", "Cre", "Mg", "Sodium", "BUN",
"Plate", "RBC", "WBC",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia"
)
formula_3 <-
cols_cph_3 |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_3
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii +
## rrt + ventilation + urine_out + Glucose + Calcium + Cre +
## Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic +
## inotrop + anti_adrenal + ca_blocker + anticoag + antiplate +
## ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis +
## venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
## aki + septic_shock + pneumonia
## <environment: 0x00000236d8a51200>
fits_3 <- map(ds_list, ~ {
des <- svydesign(ids = ~subclass, weights = ~weights, data = .x)
svycoxph(formula_3, design = des)
})
pooled_3 <- miceadds::pool_mi(
qhat = lapply(fits_3, coef),
u = lapply(fits_3, vcov)
)
sum_p_3 <- summary(pooled_3) %>%
as.data.frame() |>
rownames_to_column(var = "term") %>%
setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_3, coef), u = lapply(fits_3,
## vcov))
## results se t p
## h2ras -0.3003429926 6.317288e-02 -4.754302809 2.003308e-06
## age 0.0257879424 3.456598e-03 7.460497168 5.322777e-11
## BMI -0.0067265972 5.659923e-03 -1.188460828 2.353167e-01
## GENDER 0.0366686365 6.816826e-02 0.537913610 5.906844e-01
## ADMISSION_TYPEEMERGENCY 0.8380961858 1.621058e-01 5.170056936 2.809121e-07
## ADMISSION_TYPEURGENT 0.1729905144 3.178751e-01 0.544209058 5.867858e-01
## INSURANCEMedicare 0.0064151098 1.056852e-01 0.060700155 9.516182e-01
## INSURANCEGovernment -0.0415860541 3.336724e-01 -0.124631369 9.008715e-01
## INSURANCESelf Pay 0.5953384303 4.727181e-01 1.259394061 2.096908e-01
## ethnicity_5hispanic 0.0285354895 2.337413e-01 0.122081529 9.028585e-01
## ethnicity_5asian 0.0123329811 2.575804e-01 0.047880125 9.618734e-01
## ethnicity_5black 0.0397854046 1.342952e-01 0.296253473 7.671304e-01
## ethnicity_5other 0.2287355679 9.268369e-02 2.467915977 1.387093e-02
## resprate 0.0257747132 5.433418e-03 4.743738154 7.493703e-06
## LVEF_ctgmild 0.1305191581 1.041575e-01 1.253094684 2.109343e-01
## LVEF_ctgmoderate 0.2141326965 1.049597e-01 2.040142275 4.363705e-02
## LVEF_ctgsevere 0.3353850399 9.203983e-02 3.643911987 3.126798e-04
## SOFA 0.0087224757 1.888383e-02 0.461901925 6.460574e-01
## apsiii 0.0173441565 2.475088e-03 7.007490453 5.514558e-11
## rrt 0.2055786189 1.628938e-01 1.262040655 2.159397e-01
## ventilation 0.4685733424 8.636046e-02 5.425785906 2.706866e-07
## urine_out -0.0002412922 4.554674e-05 -5.297682566 2.138933e-06
## Glucose -0.0001704706 4.385019e-04 -0.388756684 6.985352e-01
## Calcium 0.0461699745 4.094116e-02 1.127715228 2.615599e-01
## Cre -0.1534597297 3.839471e-02 -3.996898054 1.641221e-04
## Mg 0.0751600697 1.223636e-01 0.614235460 5.435700e-01
## Sodium -0.0152249940 6.288262e-03 -2.421176885 1.637614e-02
## BUN 0.0055851435 2.278502e-03 2.451235132 1.898237e-02
## Plate -0.0001097837 3.030557e-04 -0.362255887 7.181480e-01
## RBC -0.0245595431 5.086988e-02 -0.482791428 6.292951e-01
## WBC 0.0029857029 6.157207e-04 4.849119016 1.250780e-06
## RAA -0.6778558238 7.858796e-02 -8.625440879 2.543914e-15
## Diuritic -0.4050798136 1.314217e-01 -3.082289049 3.372456e-03
## inotrop 0.3108853031 8.690564e-02 3.577274237 4.819714e-04
## anti_adrenal -0.6176685580 9.736732e-02 -6.343694692 2.363166e-10
## ca_blocker -0.1129266334 7.071256e-02 -1.596981210 1.104847e-01
## anticoag -0.1050590358 1.777756e-01 -0.590964395 5.583847e-01
## antiplate -0.1822773821 8.431114e-02 -2.161960947 3.111729e-02
## ppi -0.1667028042 1.079652e-01 -1.544042649 1.330167e-01
## hypertension 0.0177652703 7.272498e-02 0.244280171 8.073804e-01
## DM -0.0451034166 8.077522e-02 -0.558381851 5.776060e-01
## anemia -0.4222816459 8.259874e-02 -5.112446777 1.851743e-06
## Af 0.1405803216 6.976737e-02 2.014986624 4.503669e-02
## coronary_atherosclerosis -0.1292986107 8.523907e-02 -1.516893734 1.342228e-01
## venous_thrombus 0.1362632098 2.039985e-01 0.667961976 5.054955e-01
## MI 0.1755953928 1.015523e-01 1.729113559 8.646473e-02
## gastric_ulcer -0.3694991362 4.289925e-01 -0.861318454 3.893585e-01
## duodenal_ulcer -0.0032385801 4.761333e-01 -0.006801835 9.946048e-01
## gastric_bleeding -0.1564137106 1.749317e-01 -0.894141422 3.730093e-01
## aki 0.2348448269 7.606754e-02 3.087319985 2.048207e-03
## septic_shock 0.0928369756 1.049627e-01 0.884475665 3.776743e-01
## pneumonia 0.1758907854 7.591390e-02 2.316977240 2.127354e-02
## (lower upper) missInfo
## h2ras -0.4241660396 -0.1765199457 1.3 %
## age 0.0189203330 0.0326555518 22.8 %
## BMI -0.0178515800 0.0043983856 10.1 %
## GENDER -0.0970031572 0.1703404303 4.1 %
## ADMISSION_TYPEEMERGENCY 0.5200018562 1.1561905153 6.4 %
## ADMISSION_TYPEURGENT -0.4530839798 0.7990650086 13.4 %
## INSURANCEMedicare -0.2011429048 0.2139731244 8.5 %
## INSURANCEGovernment -0.6973499869 0.6141778788 9.9 %
## INSURANCESelf Pay -0.3381117822 1.5287886428 16.7 %
## ethnicity_5hispanic -0.4301327810 0.4872037600 6.4 %
## ethnicity_5asian -0.4964863467 0.5211523089 17.1 %
## ethnicity_5black -0.2239152389 0.3034860480 8.1 %
## ethnicity_5other 0.0467079144 0.4107632214 8.5 %
## resprate 0.0149860978 0.0365633286 22.3 %
## LVEF_ctgmild -0.0742716764 0.3353099926 10.7 %
## LVEF_ctgmoderate 0.0062179866 0.4220474064 20.1 %
## LVEF_ctgsevere 0.1543130252 0.5164570547 11.7 %
## SOFA -0.0291608581 0.0466058095 30.2 %
## apsiii 0.0124580982 0.0222302149 16.4 %
## rrt -0.1260589379 0.5372161756 38.8 %
## ventilation 0.2977267304 0.6394199544 18.7 %
## urine_out -0.0003325757 -0.0001500086 29.5 %
## Glucose -0.0010437196 0.0007027785 24.8 %
## Calcium -0.0348428664 0.1271828153 19 %
## Cre -0.2301148459 -0.0768046136 26.8 %
## Mg -0.1744664747 0.3247866142 39.8 %
## Sodium -0.0276258921 -0.0028240958 15.1 %
## BUN 0.0009715045 0.0101987825 35.9 %
## Plate -0.0007131945 0.0004936271 24.7 %
## RBC -0.1243212260 0.0752021397 4.5 %
## WBC 0.0017788289 0.0041925769 1.5 %
## RAA -0.8328760052 -0.5228356424 15.4 %
## Diuritic -0.6692029232 -0.1409567040 31.4 %
## inotrop 0.1390203194 0.4827502867 18.4 %
## anti_adrenal -0.8085334291 -0.4268036869 2.2 %
## ca_blocker -0.2516348665 0.0257815996 5.3 %
## anticoag -0.4661095371 0.2559914656 37.5 %
## antiplate -0.3479441280 -0.0166106362 9.5 %
## ppi -0.3871565071 0.0537508987 40.3 %
## hypertension -0.1260470626 0.1615776031 18.3 %
## DM -0.2050043918 0.1147975586 19.4 %
## anemia -0.5864419447 -0.2581213471 23.1 %
## Af 0.0031361338 0.2780245094 13.7 %
## coronary_atherosclerosis -0.2995865955 0.0409893741 27.3 %
## venous_thrombus -0.2678161212 0.5403425408 20 %
## MI -0.0255541206 0.3767449061 20 %
## gastric_ulcer -1.2117696708 0.4727713985 7.8 %
## duodenal_ulcer -0.9638047561 0.9573275958 33.7 %
## gastric_bleeding -0.5027134531 0.1898860320 19.4 %
## aki 0.0856623671 0.3840272867 4.6 %
## septic_shock -0.1143439999 0.3000179512 16.2 %
## pneumonia 0.0264143268 0.3253672440 13 %
sum_p_3
res_model_m3 <- sum_p_3 |>
filter(term == "h2ras") |>
transmute(
term = term,
HR= exp(results),
low = exp(lower),
uppe = exp(upper),
P = p
) |>
mutate(across(where(is.numeric), ~ round(.x, 3)))
res_model_m3
3つのモデルの結果をまとめて表示
bind_rows(res_model_m1 |> filter(term == "h2ras"),
res_model_m2 |> filter(term == "h2ras"),
res_model_m3 |> filter(term == "h2ras")) |>
mutate(model = c("model_1", "model_2", "model_3"), .before=HR)
今までの解析は、単なる「PS+回帰」であって、Doubly robust
にはなっていない事に注意。
DRでは、暴露モデルとアウトカムモデルの「2つのモデルの誤差を打ち消す」形が必要。
(平均値差やリスク比の解析であればPSweight等のパッケージで実行可能)
生存時間解析におけるDRについては、drsurv
packageが提供されているがハザード比は扱えない(生存確率差などのみ。以下の論文参照)。
https://academic.oup.com/biometrics/article/80/3/ujae069/7717804
MatchItのみで統合する方法(MatchThemの結果の確認)
マッチングからcox回帰までの流れを関数化する
PS_cox = function(M, f){
m_near <- MatchIt::matchit(
formula= ps_f,
data = complete(imp1, M),
method = "nearest",
distance = "glm",
caliper = 0.2,
ratio = 1,
replace = F)
matched_data <- MatchIt::match.data(m_near)
df_survival_matched <-
matched_data |>
mutate(futime30 = ifelse(futime30 <=0, 1, futime30))
cph <- coxph(f, data = df_survival_matched)
res <- summary(cph)
TE = res$coefficients[1, 1]
seTE = res$coefficients[1, 3]
return(list(TE, seTE))
}
1個目のデータセットは上述した結果とおおむね一致する
PS_cox(1, formula_1)[1] |> unlist() |> exp()
## [1] 0.7437586
PS_cox(1, formula_2)[1] |> unlist() |> exp()
## [1] 0.7572183
PS_cox(1, formula_3)[1] |> unlist() |> exp()
## [1] 0.7356662
M個の補完データセットに対して同じ操作を繰り返し、結果を格納する。
M <- 5
df_model_1 <- data.frame(TE = rep(NA, M),
seTE = rep(NA, M))
for (i in 1:M) {
res <- PS_cox(i, formula_1)
df_model_1[i, 1] <- res[[1]]
df_model_1[i, 2] <- res[[2]]
}
M個の結果を統合する
tau1bar = mean(df_model_1$TE)
w1bar = sum(df_model_1$seTE^2)/M
b1bar = sum((df_model_1$TE - mean(df_model_1$TE))^2)/(M-1)
se1bar = sqrt(w1bar + (1 + 1/M)*b1bar)
exp(tau1bar)
## [1] 0.7371666
exp(tau1bar + qnorm(c(0.025, 0.975))*se1bar)
## [1] 0.6519570 0.8335129
上記を関数化
PS_cox_meta = function(M, f){
df_model <- data.frame(TE = rep(NA, M),
seTE = rep(NA, M))
for (i in 1:M) {
res <- PS_cox(i, f)
df_model[i, 1] <- res[[1]]
df_model[i, 2] <- res[[2]]
}
tau1bar = mean(df_model$TE)
w1bar = sum(df_model$seTE^2)/M
b1bar = sum((df_model$TE - mean(df_model$TE))^2)/(M-1)
se1bar = sqrt(w1bar + (1 + 1/M)*b1bar)
return(list(HR = exp(tau1bar),
lower = exp(tau1bar + qnorm(c(0.025))*se1bar),
upper = exp(tau1bar + qnorm(c(0.975))*se1bar)))
}
model_1で確認
M <- 5
PS_cox_meta(M, formula_1) |> unlist()
## HR lower upper
## 0.7371666 0.6519570 0.8335129
PS_cox_meta(M, formula_2) |> unlist()
## HR lower upper
## 0.7534950 0.6655657 0.8530407
PS_cox_meta(M, formula_3) |> unlist()
## HR lower upper
## 0.7409222 0.6569893 0.8355777
既に作成した式を利用する
・PS作成:ps_f
・アウトカムモデル:formula_3
・estimandはマッチングに合わせてATMとした
weighted_data <-
weightthem(
formula= ps_f,
datasets = imp1,
approach = "within",
method = "glm",
# stabilize = TRUE,
estimand = "ATM")
summary(weighted_data)
## Summary of weights
##
## - Weight ranges:
##
## Min Max
## treated 0.0027 |---------------------------| 1
## control 0.0057 |---------------------------| 1
##
## - Units with the 5 most extreme weights by group:
##
## 17 16 10 8 6
## treated 1 1 1 1 1
## 59 50 24 5 3
## control 1 1 1 1 1
##
## - Weight statistics:
##
## Coef of Var MAD Entropy # Zeros
## treated 0.489 0.446 0.141 0
## control 0.678 0.605 0.253 0
##
## - Effective Sample Sizes:
##
## Control Treated
## Unweighted 5949. 4441.
## Weighted 4075.12 3584.03
love.plot(weighted_data,
drop.distance = TRUE,
var.order = "unadjusted",
thresholds = 0.1,
abs = TRUE,
position = "top",
shapes = c("circle", "triangle"),
colors = c("red", "blue"),
size = 2)
bal.plot(weighted_data,
which = "both",
type = "histogram", bins=30,
mirror = TRUE) +
labs(title = "Distributional balance for Propensity score",
x = "Propensity score")
ds_list_w <-
complete(weighted_data, action = "all") |>
lapply(\(d) d |>
mutate(futime30 = ifelse(futime30 <= 0, 1, futime30)) |>
filter(weights > 0))
注意)Robust分散について coxph() に単に weights = weights を入れて使う場合は、 重み付き推定では robust = TRUE を付けてサンドイッチ分散にするのが基本。 (同一個人のデータが重みによりが数名分に膨れ上がるため)
以下のsvycoxph() を使う形なら、追加で robust = TRUE といった指定は不要。 svycoxph() またはsurvey パッケージは、サンプリングデザイン(weights, cluster, strata)を考慮したロバストな分散推定(サンドイッチ法)を内部で計算する設計。
fits_w <- map(ds_list_w, ~ {
des <- svydesign(
ids = ~1,
weights = ~weights,
data = .x
)
svycoxph(Surv(futime30, mortality30) ~ h2ras, design = des)
})
pooled_w <- miceadds::pool_mi(
qhat = lapply(fits_w, coef),
u = lapply(fits_w, vcov)
)
sum_w <- summary(pooled_w) %>%
as.data.frame() %>%
rownames_to_column(var = "term") %>%
setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_w, coef), u = lapply(fits_w,
## vcov))
## results se t p (lower upper) missInfo
## h2ras -0.259572 0.05685395 -4.565593 4.982152e-06 -0.371004 -0.14814 0.3 %
res_model_w <- sum_w |>
filter(term == "h2ras") |>
transmute(
term = term,
HR= exp(results),
low = exp(lower),
uppe = exp(upper),
P = p
) |>
mutate(across(where(is.numeric), ~ round(.x, 3)))
res_model_w