This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#0 必要パッケージのDL
install.packages("tidyverse")
## The following package(s) will be installed:
## - tidyverse [2.0.0]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
##
## # Installing packages --------------------------------------------------------
## - Installing tidyverse ... OK [copied from cache in 1.0s]
## Successfully installed 1 package in 1.2 seconds.
install.packages("MatchIt")
## The following package(s) will be installed:
## - MatchIt [4.5.5]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
##
## # Installing packages --------------------------------------------------------
## - Installing MatchIt ... OK [copied from cache in 1.6s]
## Successfully installed 1 package in 2 seconds.
install.packages("twang")
## The following package(s) will be installed:
## - twang [2.6]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
##
## # Installing packages --------------------------------------------------------
## - Installing twang ... OK [copied from cache in 1.2s]
## Successfully installed 1 package in 1.6 seconds.
install.packages("Matching")
## The following package(s) will be installed:
## - Matching [4.10-14]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
##
## # Installing packages --------------------------------------------------------
## - Installing Matching ... OK [copied from cache in 1.2s]
## Successfully installed 1 package in 1.5 seconds.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(tableone)
library(ggplot2)
library(dplyr)
library(MatchIt)
library(Matching)
## 要求されたパッケージ MASS をロード中です
##
## 次のパッケージを付け加えます: 'MASS'
##
## 以下のオブジェクトは 'package:dplyr' からマスクされています:
##
## select
##
## ##
## ## Matching (Version 4.10-14, Build Date: 2023-09-13)
## ## See https://www.jsekhon.com for additional documentation.
## ## Please cite software as:
## ## Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
## ## Software with Automated Balance Optimization: The Matching package for R.''
## ## Journal of Statistical Software, 42(7): 1-52.
## ##
#1.データクリーニング
df1<-read.csv("_SS_OBbleed_FF1_2018_2019_dmy.csv", fileEncoding = "CP932")
df2<-read.csv("_SS_OBbleed_FF1_R_2018_2019_dmy.csv", fileEncoding = "CP932")
colnames(df1)
## [1] "adladm" "adldis" "admpath" "age"
## [5] "ambul" "brthw" "brthwk" "bunketu"
## [9] "dadm" "dcc1" "dcc2" "dcc3"
## [13] "dcc4" "dcin1" "dcin2" "dcin3"
## [17] "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height"
## [25] "HospDmycd" "jcs" "jcsdis" "kacd"
## [29] "nyuseqno" "opean1" "opean2" "opean3"
## [33] "opean4" "opean5" "opek1" "opek2"
## [37] "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind"
## [45] "readmrea" "sex" "smokingidx" "urgency"
## [49] "weight" "マンハッタン距離" "ユークリッド距離" "手術1相対日"
## [53] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日"
## [57] "特定機能" "入院期間" "年度"
colnames(df2)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "idx_nyuseqno" "jcs" "jcsdis" "kacd" "nyuseqno"
## [31] "opean1" "opean2" "opean3" "opean4" "opean5"
## [36] "opek1" "opek2" "opek3" "opek4" "opek5"
## [41] "outcome" "preg" "pregwkadm" "PtDmyCd" "readmkind"
## [46] "readmrea" "sex" "smokingidx" "urgency" "weight"
## [51] "手術1相対日" "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日"
## [56] "相対入院日" "入院期間"
length(unique(df1$PtDmyCd))
## [1] 89899
length(unique(df2$PtDmyCd))
## [1] 89899
## 2018-2019は89899人のたてもちデータ
common_columns <- intersect(colnames(df1), colnames(df2))
print(common_columns)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "jcs" "jcsdis" "kacd" "nyuseqno" "opean1"
## [31] "opean2" "opean3" "opean4" "opean5" "opek1"
## [36] "opek2" "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind" "readmrea"
## [46] "sex" "smokingidx" "urgency" "weight" "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"
df1a<- df1[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2a<- df2[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2018_2019<-rbind(df1a,df2a)
unique_patients <- length(unique(df2018_2019$PtDmyCd))
unique_patients
## [1] 89899
## 複数回入院した患者も全て含む。
df3<-read.csv("_SS_OBbleed_FF1_2020_2021_dmy.csv", fileEncoding = "CP932")
df4<-read.csv("_SS_OBbleed_FF1_R_2020_2021_dmy.csv", fileEncoding = "CP932")
colnames(df3)
## [1] "adladm" "adldis" "admpath" "age"
## [5] "ambul" "brthw" "brthwk" "bunketu"
## [9] "dadm" "dcc1" "dcc2" "dcc3"
## [13] "dcc4" "dcin1" "dcin2" "dcin3"
## [17] "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height"
## [25] "HospDmycd" "jcs" "jcsdis" "kacd"
## [29] "nyuseqno" "opean1" "opean2" "opean3"
## [33] "opean4" "opean5" "opek1" "opek2"
## [37] "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind"
## [45] "readmrea" "sex" "smokingidx" "urgency"
## [49] "weight" "マンハッタン距離" "ユークリッド距離" "手術1相対日"
## [53] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日"
## [57] "特定機能" "入院期間" "年度"
colnames(df4)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "idx_nyuseqno" "jcs" "jcsdis" "kacd" "nyuseqno"
## [31] "opean1" "opean2" "opean3" "opean4" "opean5"
## [36] "opek1" "opek2" "opek3" "opek4" "opek5"
## [41] "outcome" "preg" "pregwkadm" "PtDmyCd" "readmkind"
## [46] "readmrea" "sex" "smokingidx" "urgency" "weight"
## [51] "手術1相対日" "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日"
## [56] "相対入院日" "入院期間"
length(unique(df3$PtDmyCd))
## [1] 86699
length(unique(df4$PtDmyCd))
## [1] 86699
## 2020-2021は86699人のたてもちデータ
common_columns2 <- intersect(colnames(df3), colnames(df4))
print(common_columns2)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "jcs" "jcsdis" "kacd" "nyuseqno" "opean1"
## [31] "opean2" "opean3" "opean4" "opean5" "opek1"
## [36] "opek2" "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind" "readmrea"
## [46] "sex" "smokingidx" "urgency" "weight" "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"
df3a<- df3[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df4a<- df4[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2020_2021<-rbind(df3a,df4a)
unique_patients2 <- length(unique(df2020_2021$PtDmyCd))
unique_patients2
## [1] 86699
df5<-read.csv("_SS_OBbleed_FF1_2022_dmy.csv", fileEncoding = "CP932")
df6<-read.csv("_SS_OBbleed_FF1_R_2022_dmy.csv", fileEncoding = "CP932")
colnames(df5)
## [1] "adladm" "adldis" "admpath" "age"
## [5] "ambul" "brthw" "brthwk" "bunketu"
## [9] "dadm" "dcc1" "dcc2" "dcc3"
## [13] "dcc4" "dcin1" "dcin2" "dcin3"
## [17] "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height"
## [25] "HospDmycd" "jcs" "jcsdis" "kacd"
## [29] "nyuseqno" "opean1" "opean2" "opean3"
## [33] "opean4" "opean5" "opek1" "opek2"
## [37] "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind"
## [45] "readmrea" "sex" "smokingidx" "urgency"
## [49] "weight" "マンハッタン距離" "ユークリッド距離" "手術1相対日"
## [53] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日"
## [57] "特定機能" "入院期間"
colnames(df6)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "idx_nyuseqno" "jcs" "jcsdis" "kacd" "nyuseqno"
## [31] "opean1" "opean2" "opean3" "opean4" "opean5"
## [36] "opek1" "opek2" "opek3" "opek4" "opek5"
## [41] "outcome" "preg" "pregwkadm" "PtDmyCd" "readmkind"
## [46] "readmrea" "sex" "smokingidx" "urgency" "weight"
## [51] "手術1相対日" "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日"
## [56] "相対入院日" "入院期間"
length(unique(df5$PtDmyCd))
## [1] 41803
length(unique(df6$PtDmyCd))
## [1] 41803
## 2022は41803人のたてもちデータ
common_columns3 <- intersect(colnames(df5), colnames(df6))
print(common_columns3)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "jcs" "jcsdis" "kacd" "nyuseqno" "opean1"
## [31] "opean2" "opean3" "opean4" "opean5" "opek1"
## [36] "opek2" "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind" "readmrea"
## [46] "sex" "smokingidx" "urgency" "weight" "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"
df5a<- df5[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df6a<- df6[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2022<-rbind(df5a,df6a)
unique_patients3 <- length(unique(df2022$PtDmyCd))
unique_patients3
## [1] 41803
## 全てのデータの統合
df <- rbind(df2018_2019, df2020_2021, df2022)
length(unique(df$PtDmyCd))
## [1] 214789
colnames(df)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "jcs" "jcsdis" "kacd" "nyuseqno" "opean1"
## [31] "opean2" "opean3" "opean4" "opean5" "opek1"
## [36] "opek2" "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind" "readmrea"
## [46] "sex" "smokingidx" "urgency" "weight" "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"
## いろいろな除外の前だが、214789人のたてもちデータフレームが完成
#2. 産科出血患者の抽出
## 抽出定義にのっとり、df内から「産科出血患者」のみを抽出する。
## 条件①病名としてO679, O724, O721, O723のいずれかを含む行の抽出
## grep関数で文字列の正規表現を抽出するが、"|"表現でgrepを結合するとエラーが生じる
## 面倒だが各病名列毎にコードを実行し、重複しないようrbindする
df_dadm<- df[grep("O679|O724|O721|O723", df$dadm),]
df_dcc1<-df[grep("O679|O724|O721|O723", df$dcc1),]
df_dcc2<-df[grep("O679|O724|O721|O723", df$dcc2),]
df_dcc3<-df[grep("O679|O724|O721|O723", df$dcc3),]
df_dcc4<-df[grep("O679|O724|O721|O723", df$dcc4),]
df_dcin1<-df[grep("O679|O724|O721|O723", df$dcin1),]
df_dcin2<-df[grep("O679|O724|O721|O723", df$dcin2),]
df_dcin3<-df[grep("O679|O724|O721|O723", df$dcin3),]
df_dcin4<-df[grep("O679|O724|O721|O723", df$dcin4),]
df_combined1 <- unique(rbind(df_dadm, df_dcc1, df_dcc2, df_dcc3,
df_dcc4, df_dcin1, df_dcin2, df_dcin3,
df_dcin4))
## 条件②分娩時出血量1000以上の症例のみを抽出する
class(df$bunketu)
## [1] "integer"
df_combined2 <-filter(df, bunketu>=1000)
## 最後に、条件①OR条件②で新しいdfを作成
df<-unique(rbind(df_combined1, df_combined2))
sapply(df, class)
## adladm adldis admpath age ambul brthw
## "numeric" "numeric" "integer" "integer" "integer" "integer"
## brthwk bunketu dadm dcc1 dcc2 dcc3
## "integer" "integer" "character" "character" "character" "character"
## dcc4 dcin1 dcin2 dcin3 dcin4 dislasts
## "character" "character" "character" "character" "character" "integer"
## disto dmain dres1 dres2 dth24h height
## "character" "character" "character" "character" "integer" "integer"
## HospDmycd jcs jcsdis kacd nyuseqno opean1
## "integer" "character" "character" "integer" "integer" "integer"
## opean2 opean3 opean4 opean5 opek1 opek2
## "integer" "integer" "integer" "integer" "character" "character"
## opek3 opek4 opek5 outcome preg pregwkadm
## "character" "character" "character" "integer" "integer" "integer"
## PtDmyCd readmkind readmrea sex smokingidx urgency
## "integer" "integer" "integer" "integer" "integer" "integer"
## weight 手術1相対日 手術2相対日 手術3相対日 手術4相対日 手術5相対日
## "numeric" "integer" "integer" "integer" "integer" "integer"
## 入院期間
## "integer"
length(unique(df$PtDmyCd))
## [1] 209555
## 209555人の、産科出血患者が抽出された。
## df内に、"hysterectomy"という子宮全摘の有無を表記する列を追加する
df$hysterectomy <- as.integer(rowSums(sapply(df[c("opek1", "opek2", "opek3", "opek4", "opek5")],
function(x) grepl("K877|K876|K903|K904", x))) > 0)
table(df$hysterectomy)
##
## 0 1
## 213675 1835
hysterectomy_patients <- subset(df, hysterectomy == 1)
unique_hysterectomy_patients <- length(unique(hysterectomy_patients$PtDmyCd))
print(unique_hysterectomy_patients)
## [1] 1835
## 1835人が子宮全摘術を行っていた。
## 同様に、子宮動脈塞栓となった症例の列を追加する。
df$tae<-as.integer(rowSums(sapply(df[c("opek1", "opek2", "opek3", "opek4", "opek5")],
function(x) grepl("K6151|K6153", x))) > 0)
table(df$tae)
##
## 0 1
## 211979 3531
tae_patients <- subset(df, tae == 1)
unique_tae_patients <- length(unique(tae_patients$PtDmyCd))
print(unique_tae_patients)
## [1] 3511
## 3511人がIVRをうけた
## 同様に、帝王切開分娩の症例の列を追加する。
df$cs<-as.integer(rowSums(sapply(df[c("opek1", "opek2", "opek3", "opek4", "opek5")],
function(x) grepl("K8981|K8982", x))) > 0)
table(df$cs)
##
## 0 1
## 88970 126540
cs_patients <- subset(df, cs == 1)
unique_cs_patients <- length(unique(cs_patients$PtDmyCd))
print(unique_cs_patients)
## [1] 124157
# 124157人が帝王切開を受けた
## IVRが施行可能な施設を、タグ付けする。
## taeが"1"となったことがあるHospDmycdを同定する。
df_summary <- df %>%
group_by(HospDmycd) %>%
summarise(total_tae = sum(tae))
df_summary$ivr_shisetsu <- ifelse(df_summary$total_tae >= 1, 1, 0)
colnames(df_summary)
## [1] "HospDmycd" "total_tae" "ivr_shisetsu"
df <- left_join(df, df_summary, by = "HospDmycd")
table(df$ivr_shisetsu)
##
## 0 1
## 55258 160252
ivr_shisetsu_patients <- subset(df, ivr_shisetsu == 1)
unique_ivr_shisetsu_patients <- length(unique(ivr_shisetsu_patients$PtDmyCd))
print(unique_ivr_shisetsu_patients)
## [1] 155685
## 155685人がIVR可能施設での治療となっている
## outcome=6,7が死亡症例であり、deathという変数を追加
df$death<-as.integer(rowSums(sapply(df[c("outcome")],
function(x) grepl("6|7", x))) > 0)
table(df$death)
##
## 0 1
## 215407 103
death_patients <- subset(df, death == 1)
unique_death_patients <- length(unique(death_patients$PtDmyCd))
print(unique_death_patients)
## [1] 103
## 103人が死亡している
#3. 病名の定義(癒着胎盤等)
## 癒着胎盤
df$accreta <- ifelse(grepl("O720|O722|O730", df$dadm) |
grepl("O720|O722|O730", df$dcc1) |
grepl("O720|O722|O730", df$dcc2)|
grepl("O720|O722|O730", df$dcc3)|
grepl("O720|O722|O730", df$dcc4)|
grepl("O720|O722|O730", df$dcin1)|
grepl("O720|O722|O730", df$dcin2)|
grepl("O720|O722|O730", df$dcin3)|
grepl("O720|O722|O730", df$dcin4), 1, 0)
table(df$accreta)
##
## 0 1
## 206570 8940
accreta_patients <- subset(df, accreta == 1)
unique_accreta_patients <- length(unique(accreta_patients$PtDmyCd))
print(unique_accreta_patients)
## [1] 8797
## 8797人が癒着胎盤の診断
## 前置胎盤
df$previa <- ifelse(grepl("O441", df$dadm) |
grepl("O441", df$dcc1) |
grepl("O441", df$dcc2)|
grepl("O441", df$dcc3)|
grepl("O441", df$dcc4)|
grepl("O441", df$dcin1)|
grepl("O441", df$dcin2)|
grepl("O441", df$dcin3)|
grepl("O441", df$dcin4), 1, 0)
table(df$previa)
##
## 0 1
## 200829 14681
previa_patients <- subset(df, previa == 1)
unique_previa_patients <- length(unique(previa_patients$PtDmyCd))
print(unique_previa_patients)
## [1] 14612
#4. データ解析の前の最終チェック
# 連続変数の傾向確認
hist(df$age)
hist(df$bunketu)
hist(df$入院期間)
summary(df$bunketu)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 1025 1255 9101 1734 99999 3492
df$age <- replace(df$age, df$age <= 15 | df$age > 60, NA)
df$bunketu <- replace(df$bunketu, df$bunketu>=9999 , NA)
df$入院期間 <- replace(df$入院期間, df$入院期間 > 100, NA)
table(df$hysterectomy)
##
## 0 1
## 213675 1835
colnames(df)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "jcs" "jcsdis" "kacd" "nyuseqno" "opean1"
## [31] "opean2" "opean3" "opean4" "opean5" "opek1"
## [36] "opek2" "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind" "readmrea"
## [46] "sex" "smokingidx" "urgency" "weight" "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"
## [56] "hysterectomy" "tae" "cs" "total_tae" "ivr_shisetsu"
## [61] "death" "accreta" "previa"
#5. 表1の作成
unique_df <- df %>%
group_by(PtDmyCd) %>%
summarise(入院期間 = max(入院期間), age = max(age), bunketu=max(bunketu),cs=max(cs),death=max(death),tae=max(tae),
ivr_shisetsu=max(ivr_shisetsu),accreta=max(accreta),previa=max(previa),
hysterectomy = max(hysterectomy))
colnames(df)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "jcs" "jcsdis" "kacd" "nyuseqno" "opean1"
## [31] "opean2" "opean3" "opean4" "opean5" "opek1"
## [36] "opek2" "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind" "readmrea"
## [46] "sex" "smokingidx" "urgency" "weight" "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"
## [56] "hysterectomy" "tae" "cs" "total_tae" "ivr_shisetsu"
## [61] "death" "accreta" "previa"
CreateTableOne(vars = c("入院期間", "age","bunketu"),
strata = "hysterectomy",addOverall = TRUE,
data = unique_df)
## Stratified by hysterectomy
## Overall 0 1
## n 209555 207720 1835
## 入院期間 (mean (SD)) 12.30 (12.75) 12.22 (12.65) 22.19 (18.82)
## age (mean (SD)) 33.51 (5.38) 33.48 (5.38) 36.36 (4.78)
## bunketu (mean (SD)) 1330.21 (698.85) 1315.47 (660.18) 3125.28 (1908.60)
## Stratified by hysterectomy
## p test
## n
## 入院期間 (mean (SD)) <0.001
## age (mean (SD)) <0.001
## bunketu (mean (SD)) <0.001
table(unique_df$hysterectomy, unique_df$cs)
##
## 0 1
## 0 84348 123372
## 1 1050 785
cross_table <- table(unique_df$hysterectomy, unique_df$cs)
chisq.test(cross_table)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: cross_table
## X-squared = 207.26, df = 1, p-value < 2.2e-16
table(unique_df$hysterectomy, unique_df$death)
##
## 0 1
## 0 207633 87
## 1 1819 16
cross_table2 <- table(unique_df$hysterectomy, unique_df$death)
chisq.test(cross_table2, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: cross_table2
## X-squared = 255.09, df = NA, p-value = 0.0004998
table(unique_df$hysterectomy, unique_df$tae)
##
## 0 1
## 0 204541 3179
## 1 1503 332
cross_table3 <- table(unique_df$hysterectomy, unique_df$tae)
chisq.test(cross_table3)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: cross_table3
## X-squared = 3018.7, df = 1, p-value < 2.2e-16
table(unique_df$hysterectomy, unique_df$ivr_shisetsu)
##
## 0 1
## 0 53682 154038
## 1 188 1647
cross_table4 <- table(unique_df$hysterectomy, unique_df$ivr_shisetsu)
chisq.test(cross_table4, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: cross_table4
## X-squared = 231.72, df = NA, p-value = 0.0004998
table(unique_df$hysterectomy, unique_df$accreta)
##
## 0 1
## 0 199654 8066
## 1 1104 731
cross_table5 <- table(unique_df$hysterectomy, unique_df$accreta)
chisq.test(cross_table5, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: cross_table5
## X-squared = 5846.4, df = NA, p-value = 0.0004998
table(unique_df$hysterectomy, unique_df$previa)
##
## 0 1
## 0 193709 14011
## 1 1234 601
cross_table6 <- table(unique_df$hysterectomy, unique_df$previa)
chisq.test(cross_table6, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: cross_table6
## X-squared = 1896.6, df = NA, p-value = 0.0004998
colnames(df)
## [1] "adladm" "adldis" "admpath" "age" "ambul"
## [6] "brthw" "brthwk" "bunketu" "dadm" "dcc1"
## [11] "dcc2" "dcc3" "dcc4" "dcin1" "dcin2"
## [16] "dcin3" "dcin4" "dislasts" "disto" "dmain"
## [21] "dres1" "dres2" "dth24h" "height" "HospDmycd"
## [26] "jcs" "jcsdis" "kacd" "nyuseqno" "opean1"
## [31] "opean2" "opean3" "opean4" "opean5" "opek1"
## [36] "opek2" "opek3" "opek4" "opek5" "outcome"
## [41] "preg" "pregwkadm" "PtDmyCd" "readmkind" "readmrea"
## [46] "sex" "smokingidx" "urgency" "weight" "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"
## [56] "hysterectomy" "tae" "cs" "total_tae" "ivr_shisetsu"
## [61] "death" "accreta" "previa"
#6. previaとaccretaを除外したdfを作って解析
patients_with_history <- unique(df$PtDmyCd[df$previa == 1 | df$accreta == 1])
new_df <- df[!(df$PtDmyCd %in% patients_with_history), ]
unique_new_df <- new_df %>%
group_by(PtDmyCd) %>%
summarise(入院期間 = max(入院期間), age = max(age), bunketu=max(bunketu),cs=max(cs),death=max(death),tae=max(tae),
ivr_shisetsu=max(ivr_shisetsu),
hysterectomy = max(hysterectomy))
unique_new_df$cs <- factor(unique_new_df$cs)
unique_new_df$death<- factor(unique_new_df$death)
unique_new_df$tae <- factor(unique_new_df$tae)
unique_new_df$ivr_shisetsu <- factor(unique_new_df$ivr_shisetsu)
vars<-c("入院期間","age","bunketu","cs","death","tae","ivr_shisetsu")
factor_var<-c("cs","death","tae","ivr_shisetsu")
CreateTableOne(vars = vars,
factorVars=factor_var,
includeNA = T,
addOverall = T,
smd = T,
data = unique_new_df,
testExact = fisher.test,
testNonNormal = kruskal.test,
strata = "hysterectomy")
## Stratified by hysterectomy
## Overall 0 1
## n 187085 186273 812
## 入院期間 (mean (SD)) 11.65 (11.99) 11.63 (11.97) 16.43 (15.31)
## age (mean (SD)) 33.38 (5.42) 33.37 (5.42) 35.99 (5.10)
## bunketu (mean (SD)) 1279.51 (643.83) 1273.12 (626.04) 3023.98 (1879.27)
## cs = 1 (%) 109089 (58.3) 108819 (58.4) 270 (33.3)
## death = 1 (%) 101 ( 0.1) 86 ( 0.0) 15 ( 1.8)
## tae = 1 (%) 2240 ( 1.2) 2102 ( 1.1) 138 (17.0)
## ivr_shisetsu = 1 (%) 136928 (73.2) 136212 (73.1) 716 (88.2)
## Stratified by hysterectomy
## p test
## n
## 入院期間 (mean (SD)) <0.001
## age (mean (SD)) <0.001
## bunketu (mean (SD)) <0.001
## cs = 1 (%) <0.001
## death = 1 (%) <0.001
## tae = 1 (%) <0.001
## ivr_shisetsu = 1 (%) <0.001
skim(unique_new_df)
| Name | unique_new_df |
| Number of rows | 187085 |
| Number of columns | 9 |
| _______________________ | |
| Column type frequency: | |
| factor | 4 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| cs | 0 | 1 | FALSE | 2 | 1: 109089, 0: 77996 |
| death | 0 | 1 | FALSE | 2 | 0: 186984, 1: 101 |
| tae | 0 | 1 | FALSE | 2 | 0: 184845, 1: 2240 |
| ivr_shisetsu | 0 | 1 | FALSE | 2 | 1: 136928, 0: 50157 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| PtDmyCd | 0 | 1.0 | 19358417.49 | 16085952.05 | 12 | 3593392 | 16435649 | 34557080 | 52982269 | ▇▃▃▃▂ |
| 入院期間 | 578 | 1.0 | 11.65 | 11.99 | 1 | 7 | 8 | 10 | 100 | ▇▁▁▁▁ |
| age | 922 | 1.0 | 33.38 | 5.42 | 16 | 30 | 34 | 37 | 60 | ▁▇▇▁▁ |
| bunketu | 17849 | 0.9 | 1279.51 | 643.83 | 0 | 1000 | 1190 | 1510 | 9990 | ▇▁▁▁▁ |
| hysterectomy | 0 | 1.0 | 0.00 | 0.07 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▁ |
table1<-CreateTableOne(vars = vars,
factorVars=factor_var,
includeNA = T,
addOverall = T,
smd = T,
data = unique_new_df,
testExact = fisher.test,
testNonNormal = kruskal.test,
strata = "hysterectomy")
print(table1,smd = T)
## Stratified by hysterectomy
## Overall 0 1
## n 187085 186273 812
## 入院期間 (mean (SD)) 11.65 (11.99) 11.63 (11.97) 16.43 (15.31)
## age (mean (SD)) 33.38 (5.42) 33.37 (5.42) 35.99 (5.10)
## bunketu (mean (SD)) 1279.51 (643.83) 1273.12 (626.04) 3023.98 (1879.27)
## cs = 1 (%) 109089 (58.3) 108819 (58.4) 270 (33.3)
## death = 1 (%) 101 ( 0.1) 86 ( 0.0) 15 ( 1.8)
## tae = 1 (%) 2240 ( 1.2) 2102 ( 1.1) 138 (17.0)
## ivr_shisetsu = 1 (%) 136928 (73.2) 136212 (73.1) 716 (88.2)
## Stratified by hysterectomy
## p test SMD
## n
## 入院期間 (mean (SD)) <0.001 0.349
## age (mean (SD)) <0.001 0.498
## bunketu (mean (SD)) <0.001 1.250
## cs = 1 (%) <0.001 0.522
## death = 1 (%) <0.001 0.187
## tae = 1 (%) <0.001 0.575
## ivr_shisetsu = 1 (%) <0.001 0.388
#5.傾向スコアマッチング
# 傾向スコアをglmを用いて求める
mean_inpatient_duration <- mean(unique_new_df$入院期間, na.rm = TRUE)
unique_new_df$入院期間[is.na(unique_new_df$入院期間)] <- mean_inpatient_duration
mean_inpatient_duration <- mean(unique_new_df$age, na.rm = TRUE)
unique_new_df$age[is.na(unique_new_df$age)] <- mean_inpatient_duration
mean_inpatient_duration <- mean(unique_new_df$bunketu, na.rm = TRUE)
unique_new_df$bunketu[is.na(unique_new_df$bunketu)] <- mean_inpatient_duration
skim(unique_new_df)
| Name | unique_new_df |
| Number of rows | 187085 |
| Number of columns | 9 |
| _______________________ | |
| Column type frequency: | |
| factor | 4 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| cs | 0 | 1 | FALSE | 2 | 1: 109089, 0: 77996 |
| death | 0 | 1 | FALSE | 2 | 0: 186984, 1: 101 |
| tae | 0 | 1 | FALSE | 2 | 0: 184845, 1: 2240 |
| ivr_shisetsu | 0 | 1 | FALSE | 2 | 1: 136928, 0: 50157 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| PtDmyCd | 0 | 1 | 19358417.49 | 16085952.05 | 12 | 3593392 | 16435649 | 34557080 | 52982269 | ▇▃▃▃▂ |
| 入院期間 | 0 | 1 | 11.65 | 11.98 | 1 | 7 | 8 | 10 | 100 | ▇▁▁▁▁ |
| age | 0 | 1 | 33.38 | 5.41 | 16 | 30 | 34 | 37 | 60 | ▁▇▇▁▁ |
| bunketu | 0 | 1 | 1279.51 | 612.35 | 0 | 1020 | 1240 | 1469 | 9990 | ▇▁▁▁▁ |
| hysterectomy | 0 | 1 | 0.00 | 0.07 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▁ |
propensityScoreModel <- glm(hysterectomy ~ age + ivr_shisetsu + tae,
data = unique_new_df,
family = binomial,
na.action = na.exclude)
ps<-matchit(formula=hysterectomy ~ age + ivr_shisetsu + tae,
data=unique_new_df,
method="nearest",
distance="glm",
caliper=0.1,
ratio=1)
dfPSMatched <- match.data(ps)
CreateTableOne(vars = vars,
factorVars=factor_var,
includeNA = T,
addOverall = T,
smd = T,
data = dfPSMatched,
testExact = fisher.test,
testNonNormal = kruskal.test,
strata = "hysterectomy")
## Stratified by hysterectomy
## Overall 0 1
## n 1624 812 812
## 入院期間 (mean (SD)) 13.90 (13.55) 11.47 (11.19) 16.34 (15.17)
## age (mean (SD)) 35.98 (5.09) 35.98 (5.09) 35.98 (5.09)
## bunketu (mean (SD)) 2034.90 (1546.37) 1462.60 (942.73) 2607.19 (1800.25)
## cs = 1 (%) 738 (45.4) 468 (57.6) 270 (33.3)
## death = 1 (%) 17 ( 1.0) 2 ( 0.2) 15 ( 1.8)
## tae = 1 (%) 276 (17.0) 138 (17.0) 138 (17.0)
## ivr_shisetsu = 1 (%) 1432 (88.2) 716 (88.2) 716 (88.2)
## Stratified by hysterectomy
## p test
## n
## 入院期間 (mean (SD)) <0.001
## age (mean (SD)) 1.000
## bunketu (mean (SD)) <0.001
## cs = 1 (%) <0.001
## death = 1 (%) 0.003
## tae = 1 (%) 1.000
## ivr_shisetsu = 1 (%) 1.000
table2<-CreateTableOne(vars = vars,
factorVars=factor_var,
includeNA = T,
addOverall = T,
smd = T,
data = dfPSMatched,
testExact = fisher.test,
testNonNormal = kruskal.test,
strata = "hysterectomy")
print(table2,smd = T)
## Stratified by hysterectomy
## Overall 0 1
## n 1624 812 812
## 入院期間 (mean (SD)) 13.90 (13.55) 11.47 (11.19) 16.34 (15.17)
## age (mean (SD)) 35.98 (5.09) 35.98 (5.09) 35.98 (5.09)
## bunketu (mean (SD)) 2034.90 (1546.37) 1462.60 (942.73) 2607.19 (1800.25)
## cs = 1 (%) 738 (45.4) 468 (57.6) 270 (33.3)
## death = 1 (%) 17 ( 1.0) 2 ( 0.2) 15 ( 1.8)
## tae = 1 (%) 276 (17.0) 138 (17.0) 138 (17.0)
## ivr_shisetsu = 1 (%) 1432 (88.2) 716 (88.2) 716 (88.2)
## Stratified by hysterectomy
## p test SMD
## n
## 入院期間 (mean (SD)) <0.001 0.365
## age (mean (SD)) 1.000 <0.001
## bunketu (mean (SD)) <0.001 0.797
## cs = 1 (%) <0.001 0.505
## death = 1 (%) 0.003 0.158
## tae = 1 (%) 1.000 <0.001
## ivr_shisetsu = 1 (%) 1.000 <0.001
#今後の修正点
#欠測補完をとりあえず適当に平均代入法で行っているためこれを修正すること