packages

pacman::p_load(tidyverse, lubridate, tableone, tictoc,
               survival, survey, survminer, car, svglite)

data

df_PEO_total <- 
  read_csv("MIMIC_HDS.csv") |> 
  mutate(GENDER = ifelse(GENDER == "M", 1, 0))
df_PEO_total
col_continuous <- 
  c("age", "height", "weight", "BMI",
    "heartrate", "sysbp", "diasbp", "meanbp", "resprate", "tempc", "spo2",
    "urine_out","SOFA", "apsiii",
    "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")

col_factors <- 
  c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "mortality30", "hospital_death",
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")

df_PEO_total |> 
  select(all_of(c(col_continuous, col_factors, "h2ras")))|>
  CreateTableOne(vars = c(col_continuous, col_factors), strata = "h2ras", factorVars = col_factors) -> tableone

print(tableone, smd = T, missing = T, test = F, explain = F) |> 
  as.data.frame() |> 
  write.csv("unmatched.csv")
df_unmatched_table <- read.csv("unmatched.csv", 
                               col.names = c("name", "non_user", "user", "smd", "missing"))
df_unmatched_table
df_PEO_total |> 
  select(col_continuous) |> 
  pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 5) +
  theme_bw()+
  theme(text = element_text(size = 12))

Analysis

30day mortality

df_PEO_total |> select(h2ras, mortality30, hospital_death) |> 
  CreateTableOne(vars = c("mortality30"),
                 strata = "h2ras", 
                 factorVars = c("mortality30")) -> result_01

print(result_01) |> as.data.frame() |> 
  rename(H2RA = '1',
         Non_H2RA = '0') |> 
  write.csv("result_01.csv")
##                      Stratified by h2ras
##                       0            1            p      test
##   n                   5949         4441                    
##   mortality30 = 1 (%) 1301 (21.9)   546 (12.3)  <0.001

KM curve: 30day mortality

unadjusted

# df_PEO_total |>
#   mutate(futime30 = ifelse(mortality30 == 1, deathday-admitday, 30)) |>
#   # filter(futime30 < 0)
#   ggplot()+
#   geom_histogram(aes(x=futime30), color="black") +
#   xlim(c(-5, 1)) +
#   theme_bw()
df_survival <- 
  df_PEO_total |> 
  mutate(death_duration = (ADMITTIME %--% DOD)/days(1)) %>% 
  mutate(futime30 = ifelse(mortality30 == 1, death_duration, 31)) |>
  # filter(futime30 < -1)
  mutate(futime30 = ifelse(futime30 <=0, 1, futime30)) |> 
  select(c(col_continuous, col_factors, h2ras, futime30))

df_survival %>% select(mortality30, futime30)
formula_1 = Surv(futime30, mortality30)~ h2ras
sfit_1 = surv_fit(formula_1, data = df_survival, conf.type = "log-log")
ggsurvplot(sfit_1,
           conf.int=TRUE, pval=T, pval.method = T,
           risk.table=TRUE,
           break.time.by = 5,
           break.y.by = 0.2,
           palette = "Set1",
           xlab = "Days",
           ylab = "Cum survival",
           legend.labs=c("Non-H2RA", "H2RA"),
           legend.title = "",
           risk.table.height=0.25,
           main = "before matching"
           )

欠測補完

Multiple imputation 多重代入法

「欠測データ処理(著:高橋将宣・渡辺美智子)」が参考になる。詳細を知りたい人はこの本を読むと良いです。

論文未記載なので、どのようなパッケージを使ったのか、いくつのデータセットを作成したのか、代入モデルには何を使ったのか、収束診断がどうだったのか等かなり不明な点が多い。再現は難しいが、とりあえずやってみる。パラメーター調整はかなり適当。
データセット数は多ければ多いほど良いとされ、通常のデータ規模では100~200くらいに設定している論文が多くなってきているが、大規模データでは5くらいの論文も散見される。時間がかかるので以下の再現では5としている。これでも結構時間がかかる(環境にもよるが、4~5分かかる)。
本来は欠測パターンの確認などを先に行うべきだが、割愛する。

pacman::p_load(mice, miceadds, lmtest, car, VIM)

character型の変数は数値型(または順序)にしないといけないが、この辺りの詳細も論文未記載なので、ある程度適当に変数を前処理していく必要がある。

df_survival |> select_if(is.character)
df_survival_mice <-
  df_survival |> 
  mutate(ADMISSION_TYPE = factor(ADMISSION_TYPE, levels = c("ELECTIVE", "EMERGENCY", "URGENT")),
         INSURANCE = factor(INSURANCE, 
                            levels = c("Private", "Medicare", "Government", "Self Pay")),
         ethnicity_5 = factor(ethnicity_5,
                              levels = c("white", "hispanic", "asian", "black", "other")),
         LVEF_ctg = factor(LVEF_ctg,
                           levels = c("normal", "mild", "moderate", "severe"))
         ) 

df_survival_mice |> select_if(is.factor) 

前述のとおりm=5にしている。
さらに、maxit = 30程度にすると20分以上かかるので、デフォルトの5にしている。
maxit=5, m=5: 4~5分
(n.core = 5にすると1分くらい)

tic()
imp1 <- mice::futuremice(data = df_survival_mice,
                         m = 5, 
                         n.core = 1,
                         maxit = 5)

toc()

m = 200, maxit = 35 にすると12時間でも終わらない(計算上20時間くらいかかる)

通常マルチコアを有効利用していないので、並列処理すると早くなる。 スレッド数とCPU性能の掛け算なので、どの程度時間短縮になるかはPC環境次第。
maxit=5, m=5で 1分 に短縮される。
maxit=35, m=200で 182分 (corei7-9700K, 8c8t)
maxit=35, m=208で 115分 (Ryzen7-5700G, 8c16t)
(CPUの温度管理に注意)

行数確認

df_survival |> count()
df_survival |> na.omit() |> count()
complete(imp1, 1) |> na.omit()  |> count()

収束診断などは今回はスキップする
(maxit=5だと収束してないと思われる)
詳しく知りたい人は上述した教科書を読むこと。

plot(imp1, layout = c(2, 1))[c(1,2), 1]

vif with multiple imputation

まずはフルモデル(51変数)を作成し、各補完データセット(ここでは5個)におけるVIFを算出する。
次に5つの平均をとる。一般的にVIFは5~10以上で問題になる。

cols_cph_full <- 
  c("h2ras", 
    "age", "BMI",
    "heartrate", 
    "sysbp",
    "diasbp",
    "meanbp", "resprate", "tempc", "spo2",
    "urine_out","SOFA", "apsiii",
    "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
    "ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")

formula_full <-
cols_cph_full |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()

formula_full
## Surv(futime30, mortality30) ~ h2ras + age + BMI + heartrate + 
##     sysbp + diasbp + meanbp + resprate + tempc + spo2 + urine_out + 
##     SOFA + apsiii + Calcium + Cre + Glucose + Mg + Sodium + BUN + 
##     Plate + RBC + WBC + ADMISSION_TYPE + INSURANCE + GENDER + 
##     ethnicity_5 + rrt + ventilation + hypertension + DM + anemia + 
##     Af + coronary_atherosclerosis + venous_thrombus + MI + gastritis + 
##     gastric_ulcer + duodenal_ulcer + gastric_bleeding + aki + 
##     septic_shock + pneumonia + LVEF_ctg + RAA + Diuritic + inotrop + 
##     anti_adrenal + ca_blocker + anticoag + antiplate + ppi
## <environment: 0x00000236d52b4900>

問題になりそうな変数は平均血圧(収縮期、拡張期と相関していることが予想される)だけだが、論文では血圧はすべて除外されていた。

M <- 5
VIF <- matrix(NA, nrow = M, ncol = 51) |> 
  as.data.frame() |> 
  setNames(cols_cph_full)

for (i in 1:M) {
  data <- complete(imp1, i)
  cph_full <- coxph(formula_full, data = data)
  VIF[i,] <- car::vif(cph_full) |> unlist() |> as.vector()
}

vif_res <- 
  map(VIF, mean) |> 
  unlist() |> 
  as.data.frame() |> 
  setNames("vif")
vif_res

cox regression model with multiple imputation

- model_1: unadjusted

model_1_imp <- 
  with(data=imp1, 
       exp=coxph(Surv(futime30, mortality30)~ h2ras))

summary(pool(model_1_imp),conf.int=T) |>  
  as.data.frame() |>  
  mutate(HR = exp(estimate),
         lower = exp(estimate - 1.96*std.error),
         upper = exp(estimate + 1.96*std.error)) |>  
  select(term, HR, lower, upper, p.value) |> 
  mutate_if(is.numeric, round, 3) |> 
  write.csv("model_1.csv")
  
model_1 <- read.csv("model_1.csv")
model_1

- model_2: adjusted, partial

formula

cols_cph_2 <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
    )

formula_2 <-
cols_cph_2 |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()
formula_2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5
## <environment: 0x00000236c1055040>
model_2_imp <- 
  with(data=imp1, 
       exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI + 
                   GENDER + ADMISSION_TYPE + INSURANCE + ethnicity_5))

summary(pool(model_2_imp),conf.int=T) |>  
  as.data.frame() |>  
  mutate(HR = exp(estimate),
         lower = exp(estimate - 1.96*std.error),
         upper = exp(estimate + 1.96*std.error)) |>  
  select(term, HR, lower, upper, p.value) |> 
  mutate_if(is.numeric, round, 3) |> 
  write.csv("model_2.csv")
  
model_2 <- read.csv("model_2.csv")
model_2

- model_3: adjusted, full

formula

cols_cph_3 <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
    
    "resprate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
    "urine_out", "Glucose", "Calcium", "Cre",  "Mg", "Sodium", "BUN",
    "Plate", "RBC", "WBC",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi",
    
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia"
    
    )


formula_3 <-
cols_cph_3 |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()

formula_3
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii + 
##     rrt + ventilation + urine_out + Glucose + Calcium + Cre + 
##     Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic + 
##     inotrop + anti_adrenal + ca_blocker + anticoag + antiplate + 
##     ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis + 
##     venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding + 
##     aki + septic_shock + pneumonia
## <environment: 0x00000236bca12d98>
model_3_imp <- 
  with(data=imp1, 
       exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
                   INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii + 
                   rrt + ventilation + urine_out + Glucose + Calcium + Cre + 
                   Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic + 
                   inotrop + anti_adrenal + ca_blocker + anticoag + antiplate + 
                   ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis + 
                   venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
                   aki + septic_shock + pneumonia))

summary(pool(model_3_imp),conf.int=T) |>  
  as.data.frame() |>  
  mutate(HR = exp(estimate),
         lower = exp(estimate - 1.96*std.error),
         upper = exp(estimate + 1.96*std.error)) |>  
  select(term, HR, lower, upper, p.value) |> 
  mutate_if(is.numeric, round, 3) |> 
  write.csv("model_3.csv")
  
model_3 <- read.csv("model_3.csv")
model_3

model_1 から model_3までの結果。論文上の数値と近い。

bind_rows(model_1 |> filter(term == "h2ras"),
          model_2 |> filter(term == "h2ras"),
          model_3 |> filter(term == "h2ras")) |> 
  mutate(model = c("model_1", "model_2", "model_3"), .before=HR) |> 
  select(!X) |> 
  write_csv("unadjusted_models.csv")

(unadjusted_models <- read_csv("unadjusted_models.csv"))

PS matching

「統計的因果推論の理論と実装(著:高橋将宣)」が参考になる。
本演習では細かいところは扱えないので、興味のある人は読むと良いです。
多重代入後のPS解析についても詳しく書かれている。

本論文では多重代入した1つのデータセットに対して操作している節があるが、以下では可能な限り全データセットを扱いたいので、MatchThemパッケージを利用する。

pacman::p_load(MatchThem, cobalt)

formula作成

cols <- 
  c(
    "age", "BMI",
    "heartrate", "meanbp", "resprate", "tempc", "spo2",
    "urine_out","SOFA", "apsiii",
    "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
    "ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")

ps_f <-
cols %>%
  str_c(collapse = " + ") %>%
  str_c("h2ras ~ ", .) %>%
  formula()
ps_f
## h2ras ~ age + BMI + heartrate + meanbp + resprate + tempc + spo2 + 
##     urine_out + SOFA + apsiii + Calcium + Cre + Glucose + Mg + 
##     Sodium + BUN + Plate + RBC + WBC + ADMISSION_TYPE + INSURANCE + 
##     GENDER + ethnicity_5 + rrt + ventilation + hypertension + 
##     DM + anemia + Af + coronary_atherosclerosis + venous_thrombus + 
##     MI + gastritis + gastric_ulcer + duodenal_ulcer + gastric_bleeding + 
##     aki + septic_shock + pneumonia + LVEF_ctg + RAA + Diuritic + 
##     inotrop + anti_adrenal + ca_blocker + anticoag + antiplate + 
##     ppi
## <environment: 0x00000236da33b5b0>

matching

matchtem_data <- 
  matchthem(
    formula= ps_f,
    datasets = imp1, 
    approach = "within",
    method = "nearest", 
    distance = "glm",
    caliper = 0.2, 
    ratio = 1, 
    replace = F)

マッチしなかった行のweightsが0になる仕様  データセット1のマッチした行だけ見るなら以下   6716例

complete(matchtem_data, action = 1) %>% 
  filter(weights != 0)

balance

summary(matchtem_data)
## 
## Call:
## matchthem(formula = ps_f, datasets = imp1, approach = "within", 
##     method = "nearest", distance = "glm", caliper = 0.2, ratio = 1, 
##     replace = F)
## 
## Summary of Balance for All Data:
##                          Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance                        0.5577        0.3302          1.0474     1.1661
## age                            71.2522       74.3456         -0.2214     0.9547
## BMI                            28.9950       28.6136          0.0471     1.0429
## heartrate                      87.9847       88.2153         -0.0123     0.8294
## meanbp                         80.4224       80.5551         -0.0075     0.9260
## resprate                       18.2793       19.9616         -0.2676     1.0209
## tempc                          36.4934       36.5442         -0.0536     0.9191
## spo2                           97.1628       96.4053          0.1548     0.7634
## urine_out                    1834.8396     1779.0881          0.0459     0.9337
## SOFA                            5.0354        4.5937          0.1464     0.9877
## apsiii                         46.4871       48.6336         -0.1095     0.9099
## Calcium                         8.6707        8.5672          0.1230     0.8973
## Cre                             1.5948        1.8095         -0.1429     0.7755
## Glucose                       153.7370      157.1977         -0.0427     0.8408
## Mg                              2.0370        1.9866          0.0999     1.5487
## Sodium                        138.2213      138.0284          0.0404     0.8436
## BUN                            30.6264       36.6873         -0.2888     0.6568
## Plate                         241.9448      245.3032         -0.0292     0.9460
## RBC                             3.8752        3.8167          0.0801     0.9250
## WBC                            11.5627       12.1072         -0.0380     1.3280
## ADMISSION_TYPEELECTIVE          0.1790        0.0693          0.2863          .
## ADMISSION_TYPEEMERGENCY         0.7985        0.8854         -0.2166          .
## ADMISSION_TYPEURGENT            0.0225        0.0454         -0.1541          .
## INSURANCEPrivate                0.2182        0.1703          0.1160          .
## INSURANCEMedicare               0.7656        0.8119         -0.1093          .
## INSURANCEGovernment             0.0131        0.0134         -0.0034          .
## INSURANCESelf Pay               0.0032        0.0044         -0.0217          .
## GENDER                          0.5508        0.5167          0.0685          .
## ethnicity_5white                0.7417        0.7247          0.0390          .
## ethnicity_5hispanic             0.0286        0.0207          0.0475          .
## ethnicity_5asian                0.0180        0.0170          0.0078          .
## ethnicity_5black                0.0817        0.0849         -0.0115          .
## ethnicity_5other                0.1299        0.1528         -0.0680          .
## rrt                             0.0311        0.0261          0.0289          .
## ventilation                     0.6393        0.4038          0.4904          .
## hypertension                    0.4560        0.3965          0.1193          .
## DM                              0.3697        0.3585          0.0232          .
## anemia                          0.3184        0.3350         -0.0357          .
## Af                              0.4580        0.4265          0.0633          .
## coronary_atherosclerosis        0.4614        0.3806          0.1621          .
## venous_thrombus                 0.0173        0.0188         -0.0114          .
## MI                              0.1615        0.1683         -0.0185          .
## gastritis                       0.0137        0.0203         -0.0567          .
## gastric_ulcer                   0.0045        0.0113         -0.1009          .
## duodenal_ulcer                  0.0056        0.0119         -0.0843          .
## gastric_bleeding                0.0414        0.0864         -0.2256          .
## aki                             0.3173        0.3799         -0.1346          .
## septic_shock                    0.0588        0.0696         -0.0460          .
## pneumonia                       0.2087        0.2258         -0.0419          .
## LVEF_ctgnormal                  0.4449        0.4209          0.0484          .
## LVEF_ctgmild                    0.1576        0.1479          0.0266          .
## LVEF_ctgmoderate                0.1799        0.1827         -0.0073          .
## LVEF_ctgsevere                  0.2175        0.2484         -0.0750          .
## RAA                             0.6129        0.4841          0.2645          .
## Diuritic                        0.9194        0.7247          0.7153          .
## inotrop                         0.5102        0.2992          0.4222          .
## anti_adrenal                    0.9228        0.7462          0.6615          .
## ca_blocker                      0.3859        0.2937          0.1896          .
## anticoag                        0.9212        0.8057          0.4287          .
## antiplate                       0.8246        0.6184          0.5421          .
## ppi                             0.6726        0.7279         -0.1178          .
##                          eCDF Mean eCDF Max
## distance                    0.2738   0.3939
## age                         0.0703   0.1133
## BMI                         0.0148   0.0341
## heartrate                   0.0133   0.0490
## meanbp                      0.0078   0.0265
## resprate                    0.0388   0.1587
## tempc                       0.0088   0.0293
## spo2                        0.0123   0.1137
## urine_out                   0.0169   0.0445
## SOFA                        0.0206   0.0799
## apsiii                      0.0159   0.0688
## Calcium                     0.0108   0.0503
## Cre                         0.0151   0.0980
## Glucose                     0.0061   0.0324
## Mg                          0.0103   0.0476
## Sodium                      0.0069   0.0358
## BUN                         0.0359   0.1154
## Plate                       0.0077   0.0265
## RBC                         0.0127   0.0371
## WBC                         0.0099   0.0352
## ADMISSION_TYPEELECTIVE      0.1098   0.1098
## ADMISSION_TYPEEMERGENCY     0.0869   0.0869
## ADMISSION_TYPEURGENT        0.0229   0.0229
## INSURANCEPrivate            0.0479   0.0479
## INSURANCEMedicare           0.0463   0.0463
## INSURANCEGovernment         0.0004   0.0004
## INSURANCESelf Pay           0.0012   0.0012
## GENDER                      0.0341   0.0341
## ethnicity_5white            0.0171   0.0171
## ethnicity_5hispanic         0.0079   0.0079
## ethnicity_5asian            0.0010   0.0010
## ethnicity_5black            0.0031   0.0031
## ethnicity_5other            0.0229   0.0229
## rrt                         0.0050   0.0050
## ventilation                 0.2355   0.2355
## hypertension                0.0594   0.0594
## DM                          0.0112   0.0112
## anemia                      0.0166   0.0166
## Af                          0.0315   0.0315
## coronary_atherosclerosis    0.0808   0.0808
## venous_thrombus             0.0015   0.0015
## MI                          0.0068   0.0068
## gastritis                   0.0066   0.0066
## gastric_ulcer               0.0068   0.0068
## duodenal_ulcer              0.0063   0.0063
## gastric_bleeding            0.0450   0.0450
## aki                         0.0626   0.0626
## septic_shock                0.0108   0.0108
## pneumonia                   0.0170   0.0170
## LVEF_ctgnormal              0.0240   0.0240
## LVEF_ctgmild                0.0097   0.0097
## LVEF_ctgmoderate            0.0028   0.0028
## LVEF_ctgsevere              0.0309   0.0309
## RAA                         0.1288   0.1288
## Diuritic                    0.1947   0.1947
## inotrop                     0.2110   0.2110
## anti_adrenal                0.1766   0.1766
## ca_blocker                  0.0923   0.0923
## anticoag                    0.1155   0.1155
## antiplate                   0.2062   0.2062
## ppi                         0.0553   0.0553
## 
## Summary of Balance for Matched Data:
##                          Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance                        0.4807        0.4540          0.1231     1.2245
## age                            72.2893       72.7264         -0.0313     0.9253
## BMI                            28.9746       29.0710         -0.0119     1.0305
## heartrate                      88.4648       88.6285         -0.0087     0.9033
## meanbp                         81.0890       81.2897         -0.0114     1.0225
## resprate                       19.2879       19.5782         -0.0462     1.1457
## tempc                          36.5353       36.5476         -0.0130     0.9783
## spo2                           96.7843       96.6774          0.0219     1.1939
## urine_out                    1809.3511     1782.0661          0.0224     1.0678
## SOFA                            4.7828        4.6896          0.0309     0.9950
## apsiii                         47.5591       48.0217         -0.0236     0.9996
## Calcium                         8.6485        8.6506         -0.0025     0.9787
## Cre                             1.6819        1.7400         -0.0387     0.8301
## Glucose                       158.0229      158.9538         -0.0115     0.8708
## Mg                              2.0042        2.0015          0.0054     1.6739
## Sodium                        138.0518      138.0390          0.0027     1.0660
## BUN                            32.6041       33.7221         -0.0533     0.9963
## Plate                         246.8558      247.5225         -0.0058     0.9858
## RBC                             3.8848        3.8772          0.0104     0.9476
## WBC                            11.7396       11.8951         -0.0108     1.4419
## ADMISSION_TYPEELECTIVE          0.1111        0.0995          0.0303          .
## ADMISSION_TYPEEMERGENCY         0.8636        0.8782         -0.0364          .
## ADMISSION_TYPEURGENT            0.0253        0.0223          0.0201          .
## INSURANCEPrivate                0.1987        0.1951          0.0087          .
## INSURANCEMedicare               0.7843        0.7870         -0.0063          .
## INSURANCEGovernment             0.0137        0.0146         -0.0079          .
## INSURANCESelf Pay               0.0033        0.0033          0.0000          .
## GENDER                          0.5287        0.5284          0.0006          .
## ethnicity_5white                0.7376        0.7507         -0.0299          .
## ethnicity_5hispanic             0.0277        0.0256          0.0125          .
## ethnicity_5asian                0.0188        0.0173          0.0112          .
## ethnicity_5black                0.0885        0.0906         -0.0076          .
## ethnicity_5other                0.1275        0.1159          0.0346          .
## rrt                             0.0319        0.0304          0.0086          .
## ventilation                     0.5430        0.4972          0.0955          .
## hypertension                    0.4313        0.4290          0.0048          .
## DM                              0.3753        0.3810         -0.0117          .
## anemia                          0.3286        0.3301         -0.0032          .
## Af                              0.4436        0.4313          0.0245          .
## coronary_atherosclerosis        0.4176        0.4027          0.0299          .
## venous_thrombus                 0.0191        0.0194         -0.0023          .
## MI                              0.1665        0.1600          0.0178          .
## gastritis                       0.0164        0.0182         -0.0154          .
## gastric_ulcer                   0.0057        0.0077         -0.0311          .
## duodenal_ulcer                  0.0071        0.0077         -0.0080          .
## gastric_bleeding                0.0515        0.0560         -0.0224          .
## aki                             0.3595        0.3756         -0.0346          .
## septic_shock                    0.0709        0.0745         -0.0152          .
## pneumonia                       0.2359        0.2332          0.0066          .
## LVEF_ctgnormal                  0.4459        0.4337          0.0246          .
## LVEF_ctgmild                    0.1498        0.1534         -0.0098          .
## LVEF_ctgmoderate                0.1787        0.1838         -0.0132          .
## LVEF_ctgsevere                  0.2255        0.2291         -0.0087          .
## RAA                             0.5967        0.5979         -0.0024          .
## Diuritic                        0.8987        0.8937          0.0186          .
## inotrop                         0.4483        0.4179          0.0608          .
## anti_adrenal                    0.9032        0.9032          0.0000          .
## ca_blocker                      0.3905        0.3789          0.0239          .
## anticoag                        0.9276        0.9363         -0.0321          .
## antiplate                       0.7796        0.7733          0.0164          .
## ppi                             0.7495        0.7682         -0.0400          .
##                          eCDF Mean eCDF Max Std. Pair Dist.
## distance                    0.0303   0.0938          0.1232
## age                         0.0151   0.0360          1.1186
## BMI                         0.0060   0.0179          1.0177
## heartrate                   0.0077   0.0325          1.2027
## meanbp                      0.0060   0.0214          1.1170
## resprate                    0.0112   0.0691          1.0473
## tempc                       0.0033   0.0167          1.1015
## spo2                        0.0040   0.0319          0.8335
## urine_out                   0.0045   0.0140          1.0897
## SOFA                        0.0049   0.0226          1.0765
## apsiii                      0.0046   0.0223          1.0667
## Calcium                     0.0024   0.0191          1.0839
## Cre                         0.0037   0.0250          0.8466
## Glucose                     0.0039   0.0176          0.9713
## Mg                          0.0024   0.0125          0.8600
## Sodium                      0.0022   0.0140          1.0853
## BUN                         0.0078   0.0337          1.0677
## Plate                       0.0037   0.0143          1.0530
## RBC                         0.0048   0.0155          1.1237
## WBC                         0.0038   0.0206          0.4882
## ADMISSION_TYPEELECTIVE      0.0116   0.0116          0.4515
## ADMISSION_TYPEEMERGENCY     0.0146   0.0146          0.5250
## ADMISSION_TYPEURGENT        0.0030   0.0030          0.3172
## INSURANCEPrivate            0.0036   0.0036          0.7688
## INSURANCEMedicare           0.0027   0.0027          0.7981
## INSURANCEGovernment         0.0009   0.0009          0.2440
## INSURANCESelf Pay           0.0000   0.0000          0.0066
## GENDER                      0.0003   0.0003          1.0067
## ethnicity_5white            0.0131   0.0131          0.8657
## ethnicity_5hispanic         0.0021   0.0021          0.3056
## ethnicity_5asian            0.0015   0.0015          0.2710
## ethnicity_5black            0.0021   0.0021          0.6035
## ethnicity_5other            0.0116   0.0116          0.6282
## rrt                         0.0015   0.0015          0.3382
## ventilation                 0.0459   0.0459          0.8660
## hypertension                0.0024   0.0024          0.9892
## DM                          0.0057   0.0057          0.9546
## anemia                      0.0015   0.0015          0.9508
## Af                          0.0122   0.0122          0.9692
## coronary_atherosclerosis    0.0149   0.0149          0.9417
## venous_thrombus             0.0003   0.0003          0.2898
## MI                          0.0066   0.0066          0.7319
## gastritis                   0.0018   0.0018          0.2918
## gastric_ulcer               0.0021   0.0021          0.1913
## duodenal_ulcer              0.0006   0.0006          0.1991
## gastric_bleeding            0.0045   0.0045          0.4978
## aki                         0.0161   0.0161          0.9793
## septic_shock                0.0036   0.0036          0.5801
## pneumonia                   0.0027   0.0027          0.8759
## LVEF_ctgnormal              0.0122   0.0122          0.9968
## LVEF_ctgmild                0.0036   0.0036          0.6998
## LVEF_ctgmoderate            0.0051   0.0051          0.7623
## LVEF_ctgsevere              0.0036   0.0036          0.8390
## RAA                         0.0012   0.0012          0.9345
## Diuritic                    0.0051   0.0051          0.5504
## inotrop                     0.0304   0.0304          0.8509
## anti_adrenal                0.0000   0.0000          0.1436
## ca_blocker                  0.0116   0.0116          0.9344
## anticoag                    0.0086   0.0086          0.4522
## antiplate                   0.0063   0.0063          0.8138
## ppi                         0.0188   0.0188          0.7586
## 
## Sample Sizes:
##           Control Treated
## All          5949    4441
## Matched      3357    3357
## Unmatched    2592    1084
## Discarded       0       0
bal.tab(matchtem_data)
## Balance summary across all imputations
##                              Type Min.Diff.Adj Mean.Diff.Adj Max.Diff.Adj
## distance                 Distance       0.1218        0.1239       0.1260
## age                       Contin.      -0.0516       -0.0377      -0.0298
## BMI                       Contin.      -0.0119       -0.0015       0.0050
## heartrate                 Contin.      -0.0215       -0.0119      -0.0025
## meanbp                    Contin.      -0.0126       -0.0076       0.0022
## resprate                  Contin.      -0.0581       -0.0500      -0.0362
## tempc                     Contin.      -0.0130       -0.0040       0.0087
## spo2                      Contin.      -0.0006        0.0202       0.0374
## urine_out                 Contin.      -0.0006        0.0078       0.0224
## SOFA                      Contin.       0.0059        0.0320       0.0439
## apsiii                    Contin.      -0.0491       -0.0200      -0.0062
## Calcium                   Contin.      -0.0050        0.0041       0.0127
## Cre                       Contin.      -0.0387       -0.0183      -0.0057
## Glucose                   Contin.      -0.0246       -0.0153      -0.0115
## Mg                        Contin.      -0.0077        0.0049       0.0204
## Sodium                    Contin.      -0.0070        0.0005       0.0084
## BUN                       Contin.      -0.0624       -0.0465      -0.0344
## Plate                     Contin.      -0.0092       -0.0038      -0.0002
## RBC                       Contin.       0.0076        0.0140       0.0222
## WBC                       Contin.      -0.0125       -0.0115      -0.0103
## ADMISSION_TYPE_ELECTIVE    Binary       0.0089        0.0149       0.0218
## ADMISSION_TYPE_EMERGENCY   Binary      -0.0238       -0.0165      -0.0101
## ADMISSION_TYPE_URGENT      Binary      -0.0003        0.0016       0.0030
## INSURANCE_Private          Binary       0.0036        0.0073       0.0113
## INSURANCE_Medicare         Binary      -0.0110       -0.0077      -0.0027
## INSURANCE_Government       Binary      -0.0012        0.0000       0.0015
## INSURANCE_Self Pay         Binary       0.0000        0.0005       0.0009
## GENDER                     Binary       0.0003        0.0042       0.0072
## ethnicity_5_white          Binary      -0.0131       -0.0091      -0.0036
## ethnicity_5_hispanic       Binary       0.0009        0.0027       0.0039
## ethnicity_5_asian          Binary      -0.0018        0.0005       0.0015
## ethnicity_5_black          Binary      -0.0024       -0.0009       0.0024
## ethnicity_5_other          Binary       0.0039        0.0068       0.0116
## rrt                        Binary      -0.0003        0.0017       0.0051
## ventilation                Binary       0.0398        0.0418       0.0459
## hypertension               Binary       0.0024        0.0078       0.0143
## DM                         Binary      -0.0057       -0.0000       0.0050
## anemia                     Binary      -0.0080       -0.0024       0.0048
## Af                         Binary       0.0015        0.0080       0.0128
## coronary_atherosclerosis   Binary       0.0024        0.0122       0.0208
## venous_thrombus            Binary      -0.0015       -0.0007       0.0003
## MI                         Binary      -0.0006        0.0036       0.0071
## gastritis                  Binary      -0.0024       -0.0018      -0.0012
## gastric_ulcer              Binary      -0.0021       -0.0015      -0.0012
## duodenal_ulcer             Binary      -0.0006       -0.0002       0.0003
## gastric_bleeding           Binary      -0.0075       -0.0048      -0.0033
## aki                        Binary      -0.0200       -0.0135      -0.0030
## septic_shock               Binary      -0.0065       -0.0042      -0.0021
## pneumonia                  Binary      -0.0072       -0.0023       0.0027
## LVEF_ctg_normal            Binary      -0.0030        0.0018       0.0122
## LVEF_ctg_mild              Binary      -0.0036        0.0033       0.0095
## LVEF_ctg_moderate          Binary      -0.0077       -0.0047       0.0018
## LVEF_ctg_severe            Binary      -0.0048       -0.0005       0.0054
## RAA                        Binary      -0.0012        0.0054       0.0137
## Diuritic                   Binary       0.0036        0.0051       0.0074
## inotrop                    Binary       0.0279        0.0316       0.0349
## anti_adrenal               Binary       0.0000        0.0048       0.0098
## ca_blocker                 Binary       0.0024        0.0076       0.0128
## anticoag                   Binary      -0.0086       -0.0066      -0.0027
## antiplate                  Binary       0.0063        0.0146       0.0197
## ppi                        Binary      -0.0188       -0.0116      -0.0050
## 
## Average sample sizes across imputations
##              0    1
## All       5949 4441
## Matched   3358 3358
## Unmatched 2591 1083

- love plot  

複数データセットなので、エラーバー付きになる。  

love.plot(matchtem_data,
          drop.distance = TRUE,
          var.order = "unadjusted", 
          thresholds = 0.1,
          abs = TRUE,
          position = "top",
          shapes = c("circle", "triangle"),
          colors = c("red", "blue"),
          size = 2)

- bal.plot

各データセットでのバランス確認。

bal.plot(matchtem_data,
         which = "both",
         type = "histogram", bins=30,
         mirror = TRUE) +
  labs(title = "Distributional balance for Propensity score",
       x = "Propensity score")

Analysis after matched

5つのデータセットがあるので、以下はそのうち1つを取り出したもの。

complete(matchtem_data, action = 1) %>% 
  filter(weights != 0) %>% 
  select(h2ras, mortality30, hospital_death) |> 
  CreateTableOne(vars = c("mortality30"),
                 strata = "h2ras", 
                 factorVars = c("mortality30")) -> result_matched

print(result_matched) |> as.data.frame() |> 
  rename(H2RA = '1',
         Non_H2RA = '0') |> 
  write.csv("result_matched.csv")
##                      Stratified by h2ras
##                       0            1            p      test
##   n                   3357         3357                    
##   mortality30 = 1 (%)  638 (19.0)   487 (14.5)  <0.001

KM curve: 30day mortality  

5つのデータセットがあるので、以下はそのうち1つを取り出したもの。
(重ね合わせも可能だが、可読性は落ちる)

data

df_survival_matched <- 
  complete(matchtem_data, action = 1) %>% 
  mutate(futime30 = ifelse(futime30 <=0, 1, futime30)) %>% 
  filter(weights != 0)
formula_1 = Surv(futime30, mortality30)~ h2ras
sfit_m1 = surv_fit(formula_1, data = df_survival_matched, conf.type = "log-log")
# svglite(filename = "KM_1.svg")
ggsurvplot(sfit_m1,
           conf.int=TRUE, pval=T, pval.method = T,
           risk.table=TRUE,
           break.time.by = 5,
           break.y.by = 0.2,
           palette = "Set1",
           xlab = "Days",
           ylab = "Cum survival",
           legend.labs=c("Non-H2AR", "H2AR"),
           legend.title = "",
           risk.table.height=0.25,
           main = "before matching"
           )

# dev.off()

cox regression model

data

ds_list <- complete(matchtem_data, action = "all") |>
  lapply(\(d) d |>
           mutate(futime30 = ifelse(futime30 <= 0, 1, futime30)) |>
           filter(weights > 0))           

- model_1

formula

formula_1 = Surv(futime30, mortality30)~ h2ras
fits_1 <- map(ds_list, ~ {
  des <- svydesign(ids = ~subclass, weights = ~weights, data = .x)
  svycoxph(formula_1, design = des)
})


pooled_1 <- miceadds::pool_mi(
  qhat = lapply(fits_1, coef),  
  u    = lapply(fits_1, vcov)   
)


sum_p_1 <- summary(pooled_1) %>%  
  as.data.frame() |> 
  rownames_to_column(var = "term") %>% 
  setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_1, coef), u = lapply(fits_1, 
##     vcov))
##          results         se         t            p     (lower     upper)
## h2ras -0.3049414 0.06233841 -4.891709 1.333815e-06 -0.4274066 -0.1824762
##       missInfo
## h2ras    9.1 %
sum_p_1
res_model_m1 <- sum_p_1 |>
  filter(term == "h2ras") |>
  transmute(
    term = term,
    HR= exp(results),
    low = exp(lower),
    uppe = exp(upper),
    P = p
  ) |>
  mutate(across(where(is.numeric), ~ round(.x, 3)))

res_model_m1

- model_2

formula

cols_cph_2 <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
    )

formula_2 <-
cols_cph_2 |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()
formula_2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5
## <environment: 0x00000236e6570118>
fits_2 <- map(ds_list, ~ {
  des <- svydesign(ids = ~subclass, weights = ~weights, data = .x)
  svycoxph(formula_2, design = des)
})


pooled_2 <- miceadds::pool_mi(
  qhat = lapply(fits_2, coef),   
  u    = lapply(fits_2, vcov)    
)


sum_p_2 <- summary(pooled_2) %>%  
  as.data.frame() |> 
  rownames_to_column(var = "term") %>% 
  setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_2, coef), u = lapply(fits_2, 
##     vcov))
##                              results          se          t            p
## h2ras                   -0.283124008 0.063237343 -4.4771648 9.843682e-06
## age                      0.025633481 0.003280719  7.8133718 9.766971e-13
## BMI                     -0.007435977 0.005924816 -1.2550563 2.119187e-01
## GENDER                   0.040546782 0.060525538  0.6699120 5.029227e-01
## ADMISSION_TYPEEMERGENCY  0.999579259 0.157814924  6.3338703 7.960665e-10
## ADMISSION_TYPEURGENT     0.460837648 0.310617298  1.4836188 1.397267e-01
## INSURANCEMedicare        0.027259325 0.102982196  0.2646994 7.913457e-01
## INSURANCEGovernment     -0.190613768 0.351662098 -0.5420367 5.878186e-01
## INSURANCESelf Pay        0.778602740 0.448787046  1.7349047 8.329455e-02
## ethnicity_5hispanic     -0.170094503 0.242025796 -0.7027949 4.824413e-01
## ethnicity_5asian         0.194651035 0.222127316  0.8763039 3.818535e-01
## ethnicity_5black        -0.191952000 0.126421463 -1.5183498 1.291568e-01
## ethnicity_5other         0.339431463 0.096142185  3.5305154 7.313692e-04
##                              (lower       upper) missInfo
## h2ras                   -0.40743780 -0.158810221   10.4 %
## age                      0.01915001  0.032116949   17.6 %
## BMI                     -0.01916777  0.004295811   19.7 %
## GENDER                  -0.07808921  0.159182772    1.5 %
## ADMISSION_TYPEEMERGENCY  0.68911094  1.310047576   11.6 %
## ADMISSION_TYPEURGENT    -0.15224275  1.073918049   16.2 %
## INSURANCEMedicare       -0.17505175  0.229570395    9.1 %
## INSURANCEGovernment     -0.88003414  0.498806600    2.9 %
## INSURANCESelf Pay       -0.10286263  1.660068110    8.7 %
## ethnicity_5hispanic     -0.64536289  0.305173886    8.2 %
## ethnicity_5asian        -0.24320194  0.632504006   14.5 %
## ethnicity_5black        -0.43995247  0.056048468    5.5 %
## ethnicity_5other         0.14774194  0.531120985   25.7 %
sum_p_2
res_model_m2 <- sum_p_2 |>
  filter(term == "h2ras") |>
  transmute(
    term = term,
    HR= exp(results),
    low = exp(lower),
    uppe = exp(upper),
    P = p
  ) |>
  mutate(across(where(is.numeric), ~ round(.x, 3)))

res_model_m2

- model_3

formula

cols_cph_3 <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
    
    "resprate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
    "urine_out", "Glucose", "Calcium", "Cre",  "Mg", "Sodium", "BUN",
    "Plate", "RBC", "WBC",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi",
    
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia"
    
    )

formula_3 <-
cols_cph_3 |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()
formula_3
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii + 
##     rrt + ventilation + urine_out + Glucose + Calcium + Cre + 
##     Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic + 
##     inotrop + anti_adrenal + ca_blocker + anticoag + antiplate + 
##     ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis + 
##     venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding + 
##     aki + septic_shock + pneumonia
## <environment: 0x00000236d8a51200>
fits_3 <- map(ds_list, ~ {
  des <- svydesign(ids = ~subclass, weights = ~weights, data = .x)
  svycoxph(formula_3, design = des)
})


pooled_3 <- miceadds::pool_mi(
  qhat = lapply(fits_3, coef),   
  u    = lapply(fits_3, vcov)    
)


sum_p_3 <- summary(pooled_3) %>%  
  as.data.frame() |> 
  rownames_to_column(var = "term") %>% 
  setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_3, coef), u = lapply(fits_3, 
##     vcov))
##                                results           se            t            p
## h2ras                    -0.3003429926 6.317288e-02 -4.754302809 2.003308e-06
## age                       0.0257879424 3.456598e-03  7.460497168 5.322777e-11
## BMI                      -0.0067265972 5.659923e-03 -1.188460828 2.353167e-01
## GENDER                    0.0366686365 6.816826e-02  0.537913610 5.906844e-01
## ADMISSION_TYPEEMERGENCY   0.8380961858 1.621058e-01  5.170056936 2.809121e-07
## ADMISSION_TYPEURGENT      0.1729905144 3.178751e-01  0.544209058 5.867858e-01
## INSURANCEMedicare         0.0064151098 1.056852e-01  0.060700155 9.516182e-01
## INSURANCEGovernment      -0.0415860541 3.336724e-01 -0.124631369 9.008715e-01
## INSURANCESelf Pay         0.5953384303 4.727181e-01  1.259394061 2.096908e-01
## ethnicity_5hispanic       0.0285354895 2.337413e-01  0.122081529 9.028585e-01
## ethnicity_5asian          0.0123329811 2.575804e-01  0.047880125 9.618734e-01
## ethnicity_5black          0.0397854046 1.342952e-01  0.296253473 7.671304e-01
## ethnicity_5other          0.2287355679 9.268369e-02  2.467915977 1.387093e-02
## resprate                  0.0257747132 5.433418e-03  4.743738154 7.493703e-06
## LVEF_ctgmild              0.1305191581 1.041575e-01  1.253094684 2.109343e-01
## LVEF_ctgmoderate          0.2141326965 1.049597e-01  2.040142275 4.363705e-02
## LVEF_ctgsevere            0.3353850399 9.203983e-02  3.643911987 3.126798e-04
## SOFA                      0.0087224757 1.888383e-02  0.461901925 6.460574e-01
## apsiii                    0.0173441565 2.475088e-03  7.007490453 5.514558e-11
## rrt                       0.2055786189 1.628938e-01  1.262040655 2.159397e-01
## ventilation               0.4685733424 8.636046e-02  5.425785906 2.706866e-07
## urine_out                -0.0002412922 4.554674e-05 -5.297682566 2.138933e-06
## Glucose                  -0.0001704706 4.385019e-04 -0.388756684 6.985352e-01
## Calcium                   0.0461699745 4.094116e-02  1.127715228 2.615599e-01
## Cre                      -0.1534597297 3.839471e-02 -3.996898054 1.641221e-04
## Mg                        0.0751600697 1.223636e-01  0.614235460 5.435700e-01
## Sodium                   -0.0152249940 6.288262e-03 -2.421176885 1.637614e-02
## BUN                       0.0055851435 2.278502e-03  2.451235132 1.898237e-02
## Plate                    -0.0001097837 3.030557e-04 -0.362255887 7.181480e-01
## RBC                      -0.0245595431 5.086988e-02 -0.482791428 6.292951e-01
## WBC                       0.0029857029 6.157207e-04  4.849119016 1.250780e-06
## RAA                      -0.6778558238 7.858796e-02 -8.625440879 2.543914e-15
## Diuritic                 -0.4050798136 1.314217e-01 -3.082289049 3.372456e-03
## inotrop                   0.3108853031 8.690564e-02  3.577274237 4.819714e-04
## anti_adrenal             -0.6176685580 9.736732e-02 -6.343694692 2.363166e-10
## ca_blocker               -0.1129266334 7.071256e-02 -1.596981210 1.104847e-01
## anticoag                 -0.1050590358 1.777756e-01 -0.590964395 5.583847e-01
## antiplate                -0.1822773821 8.431114e-02 -2.161960947 3.111729e-02
## ppi                      -0.1667028042 1.079652e-01 -1.544042649 1.330167e-01
## hypertension              0.0177652703 7.272498e-02  0.244280171 8.073804e-01
## DM                       -0.0451034166 8.077522e-02 -0.558381851 5.776060e-01
## anemia                   -0.4222816459 8.259874e-02 -5.112446777 1.851743e-06
## Af                        0.1405803216 6.976737e-02  2.014986624 4.503669e-02
## coronary_atherosclerosis -0.1292986107 8.523907e-02 -1.516893734 1.342228e-01
## venous_thrombus           0.1362632098 2.039985e-01  0.667961976 5.054955e-01
## MI                        0.1755953928 1.015523e-01  1.729113559 8.646473e-02
## gastric_ulcer            -0.3694991362 4.289925e-01 -0.861318454 3.893585e-01
## duodenal_ulcer           -0.0032385801 4.761333e-01 -0.006801835 9.946048e-01
## gastric_bleeding         -0.1564137106 1.749317e-01 -0.894141422 3.730093e-01
## aki                       0.2348448269 7.606754e-02  3.087319985 2.048207e-03
## septic_shock              0.0928369756 1.049627e-01  0.884475665 3.776743e-01
## pneumonia                 0.1758907854 7.591390e-02  2.316977240 2.127354e-02
##                                 (lower        upper) missInfo
## h2ras                    -0.4241660396 -0.1765199457    1.3 %
## age                       0.0189203330  0.0326555518   22.8 %
## BMI                      -0.0178515800  0.0043983856   10.1 %
## GENDER                   -0.0970031572  0.1703404303    4.1 %
## ADMISSION_TYPEEMERGENCY   0.5200018562  1.1561905153    6.4 %
## ADMISSION_TYPEURGENT     -0.4530839798  0.7990650086   13.4 %
## INSURANCEMedicare        -0.2011429048  0.2139731244    8.5 %
## INSURANCEGovernment      -0.6973499869  0.6141778788    9.9 %
## INSURANCESelf Pay        -0.3381117822  1.5287886428   16.7 %
## ethnicity_5hispanic      -0.4301327810  0.4872037600    6.4 %
## ethnicity_5asian         -0.4964863467  0.5211523089   17.1 %
## ethnicity_5black         -0.2239152389  0.3034860480    8.1 %
## ethnicity_5other          0.0467079144  0.4107632214    8.5 %
## resprate                  0.0149860978  0.0365633286   22.3 %
## LVEF_ctgmild             -0.0742716764  0.3353099926   10.7 %
## LVEF_ctgmoderate          0.0062179866  0.4220474064   20.1 %
## LVEF_ctgsevere            0.1543130252  0.5164570547   11.7 %
## SOFA                     -0.0291608581  0.0466058095   30.2 %
## apsiii                    0.0124580982  0.0222302149   16.4 %
## rrt                      -0.1260589379  0.5372161756   38.8 %
## ventilation               0.2977267304  0.6394199544   18.7 %
## urine_out                -0.0003325757 -0.0001500086   29.5 %
## Glucose                  -0.0010437196  0.0007027785   24.8 %
## Calcium                  -0.0348428664  0.1271828153     19 %
## Cre                      -0.2301148459 -0.0768046136   26.8 %
## Mg                       -0.1744664747  0.3247866142   39.8 %
## Sodium                   -0.0276258921 -0.0028240958   15.1 %
## BUN                       0.0009715045  0.0101987825   35.9 %
## Plate                    -0.0007131945  0.0004936271   24.7 %
## RBC                      -0.1243212260  0.0752021397    4.5 %
## WBC                       0.0017788289  0.0041925769    1.5 %
## RAA                      -0.8328760052 -0.5228356424   15.4 %
## Diuritic                 -0.6692029232 -0.1409567040   31.4 %
## inotrop                   0.1390203194  0.4827502867   18.4 %
## anti_adrenal             -0.8085334291 -0.4268036869    2.2 %
## ca_blocker               -0.2516348665  0.0257815996    5.3 %
## anticoag                 -0.4661095371  0.2559914656   37.5 %
## antiplate                -0.3479441280 -0.0166106362    9.5 %
## ppi                      -0.3871565071  0.0537508987   40.3 %
## hypertension             -0.1260470626  0.1615776031   18.3 %
## DM                       -0.2050043918  0.1147975586   19.4 %
## anemia                   -0.5864419447 -0.2581213471   23.1 %
## Af                        0.0031361338  0.2780245094   13.7 %
## coronary_atherosclerosis -0.2995865955  0.0409893741   27.3 %
## venous_thrombus          -0.2678161212  0.5403425408     20 %
## MI                       -0.0255541206  0.3767449061     20 %
## gastric_ulcer            -1.2117696708  0.4727713985    7.8 %
## duodenal_ulcer           -0.9638047561  0.9573275958   33.7 %
## gastric_bleeding         -0.5027134531  0.1898860320   19.4 %
## aki                       0.0856623671  0.3840272867    4.6 %
## septic_shock             -0.1143439999  0.3000179512   16.2 %
## pneumonia                 0.0264143268  0.3253672440     13 %
sum_p_3
res_model_m3 <- sum_p_3 |>
  filter(term == "h2ras") |>
  transmute(
    term = term,
    HR= exp(results),
    low = exp(lower),
    uppe = exp(upper),
    P = p
  ) |>
  mutate(across(where(is.numeric), ~ round(.x, 3)))

res_model_m3

3つのモデルの結果をまとめて表示  

bind_rows(res_model_m1 |> filter(term == "h2ras"),
          res_model_m2 |> filter(term == "h2ras"),
          res_model_m3 |> filter(term == "h2ras")) |> 
  mutate(model = c("model_1", "model_2", "model_3"), .before=HR)

注意

今までの解析は、単なる「PS+回帰」であって、Doubly robust にはなっていない事に注意。 DRでは、暴露モデルとアウトカムモデルの「2つのモデルの誤差を打ち消す」形が必要。
(平均値差やリスク比の解析であればPSweight等のパッケージで実行可能)

生存時間解析におけるDRについては、drsurv packageが提供されているがハザード比は扱えない(生存確率差などのみ。以下の論文参照)。
https://academic.oup.com/biometrics/article/80/3/ujae069/7717804

—付録—

M個のデータセット毎に結果を出して最後に統合する方法  

MatchItのみで統合する方法(MatchThemの結果の確認)

関数化

マッチングからcox回帰までの流れを関数化する

PS_cox = function(M, f){
  
 m_near <- MatchIt::matchit(
  formula= ps_f,
  data = complete(imp1, M),
  method = "nearest", 
  distance = "glm", 
  caliper = 0.2, 
  ratio = 1, 
  replace = F)

 matched_data <- MatchIt::match.data(m_near)

 df_survival_matched <- 
  matched_data |> 
  mutate(futime30 = ifelse(futime30 <=0, 1, futime30))

 cph <- coxph(f, data = df_survival_matched)
 res <- summary(cph)

 TE = res$coefficients[1, 1]
 seTE = res$coefficients[1, 3]
 return(list(TE, seTE))
}

1個目のデータセットは上述した結果とおおむね一致する

PS_cox(1, formula_1)[1] |> unlist() |> exp()
## [1] 0.7437586
PS_cox(1, formula_2)[1] |> unlist() |> exp()
## [1] 0.7572183
PS_cox(1, formula_3)[1] |> unlist() |> exp()
## [1] 0.7356662

model_1

M個の補完データセットに対して同じ操作を繰り返し、結果を格納する。

M <- 5
df_model_1 <- data.frame(TE = rep(NA, M),
                         seTE = rep(NA, M))

for (i in 1:M) {
  res <- PS_cox(i, formula_1)
  df_model_1[i, 1] <- res[[1]]
  df_model_1[i, 2] <- res[[2]]
}

M個の結果を統合する

tau1bar = mean(df_model_1$TE)
w1bar = sum(df_model_1$seTE^2)/M
b1bar = sum((df_model_1$TE - mean(df_model_1$TE))^2)/(M-1)
se1bar = sqrt(w1bar + (1 + 1/M)*b1bar)

exp(tau1bar)
## [1] 0.7371666
exp(tau1bar + qnorm(c(0.025, 0.975))*se1bar)
## [1] 0.6519570 0.8335129

上記を関数化

PS_cox_meta = function(M, f){
  df_model <- data.frame(TE = rep(NA, M),
                         seTE = rep(NA, M))

 for (i in 1:M) {
  res <- PS_cox(i, f)
  df_model[i, 1] <- res[[1]]
  df_model[i, 2] <- res[[2]]
 }


 tau1bar = mean(df_model$TE)
 w1bar = sum(df_model$seTE^2)/M
 b1bar = sum((df_model$TE - mean(df_model$TE))^2)/(M-1)
 se1bar = sqrt(w1bar + (1 + 1/M)*b1bar)

 return(list(HR = exp(tau1bar),
             lower = exp(tau1bar + qnorm(c(0.025))*se1bar),
             upper = exp(tau1bar + qnorm(c(0.975))*se1bar)))
}

model_1で確認

M <- 5
PS_cox_meta(M, formula_1) |> unlist()
##        HR     lower     upper 
## 0.7371666 0.6519570 0.8335129

model_2

PS_cox_meta(M, formula_2) |> unlist()
##        HR     lower     upper 
## 0.7534950 0.6655657 0.8530407

model_3

PS_cox_meta(M, formula_3) |> unlist()
##        HR     lower     upper 
## 0.7409222 0.6569893 0.8355777

IPTW

既に作成した式を利用する  
・PS作成:ps_f   
・アウトカムモデル:formula_3  
・estimandはマッチングに合わせてATMとした

weighting

weighted_data <- 
  weightthem(
    formula= ps_f,
    datasets = imp1, 
    approach = "within",
    method = "glm",
    # stabilize = TRUE,
    estimand = "ATM")
summary(weighted_data)
##                   Summary of weights
## 
## - Weight ranges:
## 
##            Min                               Max
## treated 0.0027 |---------------------------|   1
## control 0.0057 |---------------------------|   1
## 
## - Units with the 5 most extreme weights by group:
##                      
##          17 16 10 8 6
##  treated  1  1  1 1 1
##          59 50 24 5 3
##  control  1  1  1 1 1
## 
## - Weight statistics:
## 
##         Coef of Var   MAD Entropy # Zeros
## treated       0.489 0.446   0.141       0
## control       0.678 0.605   0.253       0
## 
## - Effective Sample Sizes:
## 
##            Control Treated
## Unweighted 5949.   4441.  
## Weighted   4075.12 3584.03

balance

love.plot(weighted_data,
          drop.distance = TRUE,
          var.order = "unadjusted", 
          thresholds = 0.1,
          abs = TRUE,
          position = "top",
          shapes = c("circle", "triangle"),
          colors = c("red", "blue"),
          size = 2)

bal.plot(weighted_data,
         which = "both",
         type = "histogram", bins=30,
         mirror = TRUE) +
  labs(title = "Distributional balance for Propensity score",
       x = "Propensity score")

analysis

ds_list_w <- 
  complete(weighted_data, action = "all") |>
  lapply(\(d) d |>
           mutate(futime30 = ifelse(futime30 <= 0, 1, futime30)) |>
           filter(weights > 0)) 

注意)Robust分散について coxph() に単に weights = weights を入れて使う場合は、 重み付き推定では robust = TRUE を付けてサンドイッチ分散にするのが基本。 (同一個人のデータが重みによりが数名分に膨れ上がるため)

以下のsvycoxph() を使う形なら、追加で robust = TRUE といった指定は不要。 svycoxph() またはsurvey パッケージは、サンプリングデザイン(weights, cluster, strata)を考慮したロバストな分散推定(サンドイッチ法)を内部で計算する設計。

fits_w <- map(ds_list_w, ~ {
  des <- svydesign(
    ids     = ~1,        
    weights = ~weights,  
    data    = .x
  )
  svycoxph(Surv(futime30, mortality30) ~ h2ras, design = des)
})


pooled_w <- miceadds::pool_mi(
  qhat = lapply(fits_w, coef),  
  u    = lapply(fits_w, vcov)   
)


sum_w <- summary(pooled_w) %>%
  as.data.frame() %>%
  rownames_to_column(var = "term") %>%
  setNames(c("term", "results", "se", "t", "p", "lower", "upper", "missInfo"))
## Multiple imputation results:
## Call: miceadds::pool_mi(qhat = lapply(fits_w, coef), u = lapply(fits_w, 
##     vcov))
##         results         se         t            p    (lower   upper) missInfo
## h2ras -0.259572 0.05685395 -4.565593 4.982152e-06 -0.371004 -0.14814    0.3 %
res_model_w <- sum_w |>
  filter(term == "h2ras") |>
  transmute(
    term = term,
    HR= exp(results),
    low = exp(lower),
    uppe = exp(upper),
    P = p
  ) |>
  mutate(across(where(is.numeric), ~ round(.x, 3)))

res_model_w

End