Untitled

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#0 必要パッケージのDL
install.packages("tidyverse")

## The following package(s) will be installed:
## - tidyverse [2.0.0]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
## 
## # Installing packages --------------------------------------------------------
## - Installing tidyverse ...                      OK [copied from cache in 1.0s]
## Successfully installed 1 package in 1.2 seconds.

install.packages("MatchIt")

## The following package(s) will be installed:
## - MatchIt [4.5.5]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
## 
## # Installing packages --------------------------------------------------------
## - Installing MatchIt ...                        OK [copied from cache in 1.6s]
## Successfully installed 1 package in 2 seconds.

install.packages("twang")

## The following package(s) will be installed:
## - twang [2.6]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
## 
## # Installing packages --------------------------------------------------------
## - Installing twang ...                          OK [copied from cache in 1.2s]
## Successfully installed 1 package in 1.6 seconds.

install.packages("Matching")

## The following package(s) will be installed:
## - Matching [4.10-14]
## These packages will be installed into "G:/マイドライブ/HDS/清水ゼミ/子宮全摘の因果推論/DPC_2024/renv/library/R-4.3/x86_64-w64-mingw32".
## 
## # Installing packages --------------------------------------------------------
## - Installing Matching ...                       OK [copied from cache in 1.2s]
## Successfully installed 1 package in 1.5 seconds.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(skimr)
library(tableone)
library(ggplot2)
library(dplyr)
library(MatchIt)
library(Matching)

##  要求されたパッケージ MASS をロード中です 
## 
##  次のパッケージを付け加えます: 'MASS' 
## 
##  以下のオブジェクトは 'package:dplyr' からマスクされています:
## 
##     select
## 
## ## 
## ##  Matching (Version 4.10-14, Build Date: 2023-09-13)
## ##  See https://www.jsekhon.com for additional documentation.
## ##  Please cite software as:
## ##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
## ##   Software with Automated Balance Optimization: The Matching package for R.''
## ##   Journal of Statistical Software, 42(7): 1-52. 
## ##

#1.データクリーニング
df1<-read.csv("_SS_OBbleed_FF1_2018_2019_dmy.csv", fileEncoding = "CP932")
df2<-read.csv("_SS_OBbleed_FF1_R_2018_2019_dmy.csv", fileEncoding = "CP932")
colnames(df1)

##  [1] "adladm"           "adldis"           "admpath"          "age"             
##  [5] "ambul"            "brthw"            "brthwk"           "bunketu"         
##  [9] "dadm"             "dcc1"             "dcc2"             "dcc3"            
## [13] "dcc4"             "dcin1"            "dcin2"            "dcin3"           
## [17] "dcin4"            "dislasts"         "disto"            "dmain"           
## [21] "dres1"            "dres2"            "dth24h"           "height"          
## [25] "HospDmycd"        "jcs"              "jcsdis"           "kacd"            
## [29] "nyuseqno"         "opean1"           "opean2"           "opean3"          
## [33] "opean4"           "opean5"           "opek1"            "opek2"           
## [37] "opek3"            "opek4"            "opek5"            "outcome"         
## [41] "preg"             "pregwkadm"        "PtDmyCd"          "readmkind"       
## [45] "readmrea"         "sex"              "smokingidx"       "urgency"         
## [49] "weight"           "マンハッタン距離" "ユークリッド距離" "手術1相対日"     
## [53] "手術2相対日"      "手術3相対日"      "手術4相対日"      "手術5相対日"     
## [57] "特定機能"         "入院期間"         "年度"

colnames(df2)

##  [1] "adladm"       "adldis"       "admpath"      "age"          "ambul"       
##  [6] "brthw"        "brthwk"       "bunketu"      "dadm"         "dcc1"        
## [11] "dcc2"         "dcc3"         "dcc4"         "dcin1"        "dcin2"       
## [16] "dcin3"        "dcin4"        "dislasts"     "disto"        "dmain"       
## [21] "dres1"        "dres2"        "dth24h"       "height"       "HospDmycd"   
## [26] "idx_nyuseqno" "jcs"          "jcsdis"       "kacd"         "nyuseqno"    
## [31] "opean1"       "opean2"       "opean3"       "opean4"       "opean5"      
## [36] "opek1"        "opek2"        "opek3"        "opek4"        "opek5"       
## [41] "outcome"      "preg"         "pregwkadm"    "PtDmyCd"      "readmkind"   
## [46] "readmrea"     "sex"          "smokingidx"   "urgency"      "weight"      
## [51] "手術1相対日"  "手術2相対日"  "手術3相対日"  "手術4相対日"  "手術5相対日" 
## [56] "相対入院日"   "入院期間"

length(unique(df1$PtDmyCd))

## [1] 89899

length(unique(df2$PtDmyCd))

## [1] 89899

## 2018-2019は89899人のたてもちデータ
common_columns <- intersect(colnames(df1), colnames(df2))
print(common_columns)

##  [1] "adladm"      "adldis"      "admpath"     "age"         "ambul"      
##  [6] "brthw"       "brthwk"      "bunketu"     "dadm"        "dcc1"       
## [11] "dcc2"        "dcc3"        "dcc4"        "dcin1"       "dcin2"      
## [16] "dcin3"       "dcin4"       "dislasts"    "disto"       "dmain"      
## [21] "dres1"       "dres2"       "dth24h"      "height"      "HospDmycd"  
## [26] "jcs"         "jcsdis"      "kacd"        "nyuseqno"    "opean1"     
## [31] "opean2"      "opean3"      "opean4"      "opean5"      "opek1"      
## [36] "opek2"       "opek3"       "opek4"       "opek5"       "outcome"    
## [41] "preg"        "pregwkadm"   "PtDmyCd"     "readmkind"   "readmrea"   
## [46] "sex"         "smokingidx"  "urgency"     "weight"      "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"

df1a<- df1[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2a<- df2[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2018_2019<-rbind(df1a,df2a)
unique_patients <- length(unique(df2018_2019$PtDmyCd))
unique_patients

## [1] 89899

## 複数回入院した患者も全て含む。

df3<-read.csv("_SS_OBbleed_FF1_2020_2021_dmy.csv", fileEncoding = "CP932")
df4<-read.csv("_SS_OBbleed_FF1_R_2020_2021_dmy.csv", fileEncoding = "CP932")
colnames(df3)

##  [1] "adladm"           "adldis"           "admpath"          "age"             
##  [5] "ambul"            "brthw"            "brthwk"           "bunketu"         
##  [9] "dadm"             "dcc1"             "dcc2"             "dcc3"            
## [13] "dcc4"             "dcin1"            "dcin2"            "dcin3"           
## [17] "dcin4"            "dislasts"         "disto"            "dmain"           
## [21] "dres1"            "dres2"            "dth24h"           "height"          
## [25] "HospDmycd"        "jcs"              "jcsdis"           "kacd"            
## [29] "nyuseqno"         "opean1"           "opean2"           "opean3"          
## [33] "opean4"           "opean5"           "opek1"            "opek2"           
## [37] "opek3"            "opek4"            "opek5"            "outcome"         
## [41] "preg"             "pregwkadm"        "PtDmyCd"          "readmkind"       
## [45] "readmrea"         "sex"              "smokingidx"       "urgency"         
## [49] "weight"           "マンハッタン距離" "ユークリッド距離" "手術1相対日"     
## [53] "手術2相対日"      "手術3相対日"      "手術4相対日"      "手術5相対日"     
## [57] "特定機能"         "入院期間"         "年度"

colnames(df4)

##  [1] "adladm"       "adldis"       "admpath"      "age"          "ambul"       
##  [6] "brthw"        "brthwk"       "bunketu"      "dadm"         "dcc1"        
## [11] "dcc2"         "dcc3"         "dcc4"         "dcin1"        "dcin2"       
## [16] "dcin3"        "dcin4"        "dislasts"     "disto"        "dmain"       
## [21] "dres1"        "dres2"        "dth24h"       "height"       "HospDmycd"   
## [26] "idx_nyuseqno" "jcs"          "jcsdis"       "kacd"         "nyuseqno"    
## [31] "opean1"       "opean2"       "opean3"       "opean4"       "opean5"      
## [36] "opek1"        "opek2"        "opek3"        "opek4"        "opek5"       
## [41] "outcome"      "preg"         "pregwkadm"    "PtDmyCd"      "readmkind"   
## [46] "readmrea"     "sex"          "smokingidx"   "urgency"      "weight"      
## [51] "手術1相対日"  "手術2相対日"  "手術3相対日"  "手術4相対日"  "手術5相対日" 
## [56] "相対入院日"   "入院期間"

length(unique(df3$PtDmyCd))

## [1] 86699

length(unique(df4$PtDmyCd))

## [1] 86699

## 2020-2021は86699人のたてもちデータ
common_columns2 <- intersect(colnames(df3), colnames(df4))
print(common_columns2)

##  [1] "adladm"      "adldis"      "admpath"     "age"         "ambul"      
##  [6] "brthw"       "brthwk"      "bunketu"     "dadm"        "dcc1"       
## [11] "dcc2"        "dcc3"        "dcc4"        "dcin1"       "dcin2"      
## [16] "dcin3"       "dcin4"       "dislasts"    "disto"       "dmain"      
## [21] "dres1"       "dres2"       "dth24h"      "height"      "HospDmycd"  
## [26] "jcs"         "jcsdis"      "kacd"        "nyuseqno"    "opean1"     
## [31] "opean2"      "opean3"      "opean4"      "opean5"      "opek1"      
## [36] "opek2"       "opek3"       "opek4"       "opek5"       "outcome"    
## [41] "preg"        "pregwkadm"   "PtDmyCd"     "readmkind"   "readmrea"   
## [46] "sex"         "smokingidx"  "urgency"     "weight"      "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"

df3a<- df3[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df4a<- df4[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2020_2021<-rbind(df3a,df4a)
unique_patients2 <- length(unique(df2020_2021$PtDmyCd))
unique_patients2

## [1] 86699

df5<-read.csv("_SS_OBbleed_FF1_2022_dmy.csv", fileEncoding = "CP932")
df6<-read.csv("_SS_OBbleed_FF1_R_2022_dmy.csv", fileEncoding = "CP932")
colnames(df5)

##  [1] "adladm"           "adldis"           "admpath"          "age"             
##  [5] "ambul"            "brthw"            "brthwk"           "bunketu"         
##  [9] "dadm"             "dcc1"             "dcc2"             "dcc3"            
## [13] "dcc4"             "dcin1"            "dcin2"            "dcin3"           
## [17] "dcin4"            "dislasts"         "disto"            "dmain"           
## [21] "dres1"            "dres2"            "dth24h"           "height"          
## [25] "HospDmycd"        "jcs"              "jcsdis"           "kacd"            
## [29] "nyuseqno"         "opean1"           "opean2"           "opean3"          
## [33] "opean4"           "opean5"           "opek1"            "opek2"           
## [37] "opek3"            "opek4"            "opek5"            "outcome"         
## [41] "preg"             "pregwkadm"        "PtDmyCd"          "readmkind"       
## [45] "readmrea"         "sex"              "smokingidx"       "urgency"         
## [49] "weight"           "マンハッタン距離" "ユークリッド距離" "手術1相対日"     
## [53] "手術2相対日"      "手術3相対日"      "手術4相対日"      "手術5相対日"     
## [57] "特定機能"         "入院期間"

colnames(df6)

##  [1] "adladm"       "adldis"       "admpath"      "age"          "ambul"       
##  [6] "brthw"        "brthwk"       "bunketu"      "dadm"         "dcc1"        
## [11] "dcc2"         "dcc3"         "dcc4"         "dcin1"        "dcin2"       
## [16] "dcin3"        "dcin4"        "dislasts"     "disto"        "dmain"       
## [21] "dres1"        "dres2"        "dth24h"       "height"       "HospDmycd"   
## [26] "idx_nyuseqno" "jcs"          "jcsdis"       "kacd"         "nyuseqno"    
## [31] "opean1"       "opean2"       "opean3"       "opean4"       "opean5"      
## [36] "opek1"        "opek2"        "opek3"        "opek4"        "opek5"       
## [41] "outcome"      "preg"         "pregwkadm"    "PtDmyCd"      "readmkind"   
## [46] "readmrea"     "sex"          "smokingidx"   "urgency"      "weight"      
## [51] "手術1相対日"  "手術2相対日"  "手術3相対日"  "手術4相対日"  "手術5相対日" 
## [56] "相対入院日"   "入院期間"

length(unique(df5$PtDmyCd))

## [1] 41803

length(unique(df6$PtDmyCd))

## [1] 41803

## 2022は41803人のたてもちデータ
common_columns3 <- intersect(colnames(df5), colnames(df6))
print(common_columns3)

##  [1] "adladm"      "adldis"      "admpath"     "age"         "ambul"      
##  [6] "brthw"       "brthwk"      "bunketu"     "dadm"        "dcc1"       
## [11] "dcc2"        "dcc3"        "dcc4"        "dcin1"       "dcin2"      
## [16] "dcin3"       "dcin4"       "dislasts"    "disto"       "dmain"      
## [21] "dres1"       "dres2"       "dth24h"      "height"      "HospDmycd"  
## [26] "jcs"         "jcsdis"      "kacd"        "nyuseqno"    "opean1"     
## [31] "opean2"      "opean3"      "opean4"      "opean5"      "opek1"      
## [36] "opek2"       "opek3"       "opek4"       "opek5"       "outcome"    
## [41] "preg"        "pregwkadm"   "PtDmyCd"     "readmkind"   "readmrea"   
## [46] "sex"         "smokingidx"  "urgency"     "weight"      "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"

df5a<- df5[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df6a<- df6[c("adladm","adldis","admpath","age","ambul","brthw","brthwk","bunketu","dadm","dcc1","dcc2","dcc3","dcc4","dcin1","dcin2","dcin3","dcin4","dislasts","disto","dmain","dres1","dres2","dth24h","height","HospDmycd","jcs","jcsdis","kacd","nyuseqno","opean1","opean2","opean3","opean4","opean5","opek1","opek2","opek3","opek4","opek5","outcome","preg","pregwkadm","PtDmyCd","readmkind","readmrea","sex","smokingidx","urgency","weight","手術1相対日","手術2相対日","手術3相対日","手術4相対日","手術5相対日","入院期間")]
df2022<-rbind(df5a,df6a)
unique_patients3 <- length(unique(df2022$PtDmyCd))
unique_patients3

## [1] 41803

##　全てのデータの統合

df <- rbind(df2018_2019, df2020_2021, df2022)
length(unique(df$PtDmyCd))

## [1] 214789

colnames(df)

##  [1] "adladm"      "adldis"      "admpath"     "age"         "ambul"      
##  [6] "brthw"       "brthwk"      "bunketu"     "dadm"        "dcc1"       
## [11] "dcc2"        "dcc3"        "dcc4"        "dcin1"       "dcin2"      
## [16] "dcin3"       "dcin4"       "dislasts"    "disto"       "dmain"      
## [21] "dres1"       "dres2"       "dth24h"      "height"      "HospDmycd"  
## [26] "jcs"         "jcsdis"      "kacd"        "nyuseqno"    "opean1"     
## [31] "opean2"      "opean3"      "opean4"      "opean5"      "opek1"      
## [36] "opek2"       "opek3"       "opek4"       "opek5"       "outcome"    
## [41] "preg"        "pregwkadm"   "PtDmyCd"     "readmkind"   "readmrea"   
## [46] "sex"         "smokingidx"  "urgency"     "weight"      "手術1相対日"
## [51] "手術2相対日" "手術3相対日" "手術4相対日" "手術5相対日" "入院期間"

## いろいろな除外の前だが、214789人のたてもちデータフレームが完成

#2. 産科出血患者の抽出


## 抽出定義にのっとり、df内から「産科出血患者」のみを抽出する。

## 条件①病名としてO679, O724, O721, O723のいずれかを含む行の抽出
## grep関数で文字列の正規表現を抽出するが、"|"表現でgrepを結合するとエラーが生じる
## 面倒だが各病名列毎にコードを実行し、重複しないようrbindする

df_dadm<- df[grep("O679|O724|O721|O723", df$dadm),]
df_dcc1<-df[grep("O679|O724|O721|O723", df$dcc1),] 
df_dcc2<-df[grep("O679|O724|O721|O723", df$dcc2),] 
df_dcc3<-df[grep("O679|O724|O721|O723", df$dcc3),]
df_dcc4<-df[grep("O679|O724|O721|O723", df$dcc4),]
df_dcin1<-df[grep("O679|O724|O721|O723", df$dcin1),]
df_dcin2<-df[grep("O679|O724|O721|O723", df$dcin2),]
df_dcin3<-df[grep("O679|O724|O721|O723", df$dcin3),]
df_dcin4<-df[grep("O679|O724|O721|O723", df$dcin4),]

df_combined1 <- unique(rbind(df_dadm, df_dcc1, df_dcc2, df_dcc3, 
                             df_dcc4, df_dcin1, df_dcin2, df_dcin3,
                             df_dcin4))

##　条件②分娩時出血量1000以上の症例のみを抽出する
class(df$bunketu)

## [1] "integer"

df_combined2 <-filter(df, bunketu>=1000)
## 最後に、条件①OR条件②で新しいdfを作成
df<-unique(rbind(df_combined1, df_combined2))
sapply(df, class)

##      adladm      adldis     admpath         age       ambul       brthw 
##   "numeric"   "numeric"   "integer"   "integer"   "integer"   "integer" 
##      brthwk     bunketu        dadm        dcc1        dcc2        dcc3 
##   "integer"   "integer" "character" "character" "character" "character" 
##        dcc4       dcin1       dcin2       dcin3       dcin4    dislasts 
## "character" "character" "character" "character" "character"   "integer" 
##       disto       dmain       dres1       dres2      dth24h      height 
## "character" "character" "character" "character"   "integer"   "integer" 
##   HospDmycd         jcs      jcsdis        kacd    nyuseqno      opean1 
##   "integer" "character" "character"   "integer"   "integer"   "integer" 
##      opean2      opean3      opean4      opean5       opek1       opek2 
##   "integer"   "integer"   "integer"   "integer" "character" "character" 
##       opek3       opek4       opek5     outcome        preg   pregwkadm 
## "character" "character" "character"   "integer"   "integer"   "integer" 
##     PtDmyCd   readmkind    readmrea         sex  smokingidx     urgency 
##   "integer"   "integer"   "integer"   "integer"   "integer"   "integer" 
##      weight 手術1相対日 手術2相対日 手術3相対日 手術4相対日 手術5相対日 
##   "numeric"   "integer"   "integer"   "integer"   "integer"   "integer" 
##    入院期間 
##   "integer"

length(unique(df$PtDmyCd))

## [1] 209555

## 209555人の、産科出血患者が抽出された。

## df内に、"hysterectomy"という子宮全摘の有無を表記する列を追加する

df$hysterectomy <- as.integer(rowSums(sapply(df[c("opek1", "opek2", "opek3", "opek4", "opek5")], 
                                             function(x) grepl("K877|K876|K903|K904", x))) > 0)
table(df$hysterectomy)

## 
##      0      1 
## 213675   1835

hysterectomy_patients <- subset(df, hysterectomy == 1)
unique_hysterectomy_patients <- length(unique(hysterectomy_patients$PtDmyCd))
print(unique_hysterectomy_patients)

## [1] 1835

## 1835人が子宮全摘術を行っていた。

## 同様に、子宮動脈塞栓となった症例の列を追加する。
df$tae<-as.integer(rowSums(sapply(df[c("opek1", "opek2", "opek3", "opek4", "opek5")], 
                                  function(x) grepl("K6151|K6153", x))) > 0)
table(df$tae)

## 
##      0      1 
## 211979   3531

tae_patients <- subset(df, tae == 1)
unique_tae_patients <- length(unique(tae_patients$PtDmyCd))
print(unique_tae_patients)

## [1] 3511

## 3511人がIVRをうけた

## 同様に、帝王切開分娩の症例の列を追加する。
df$cs<-as.integer(rowSums(sapply(df[c("opek1", "opek2", "opek3", "opek4", "opek5")], 
                                 function(x) grepl("K8981|K8982", x))) > 0)
table(df$cs)

## 
##      0      1 
##  88970 126540

cs_patients <- subset(df, cs == 1)
unique_cs_patients <- length(unique(cs_patients$PtDmyCd))
print(unique_cs_patients)

## [1] 124157

#　124157人が帝王切開を受けた

## IVRが施行可能な施設を、タグ付けする。
## taeが"1"となったことがあるHospDmycdを同定する。
df_summary <- df %>% 
  group_by(HospDmycd) %>% 
  summarise(total_tae = sum(tae))
df_summary$ivr_shisetsu <- ifelse(df_summary$total_tae >= 1, 1, 0)
colnames(df_summary)

## [1] "HospDmycd"    "total_tae"    "ivr_shisetsu"

df <- left_join(df, df_summary, by = "HospDmycd")
table(df$ivr_shisetsu)

## 
##      0      1 
##  55258 160252

ivr_shisetsu_patients <- subset(df, ivr_shisetsu == 1)
unique_ivr_shisetsu_patients <- length(unique(ivr_shisetsu_patients$PtDmyCd))
print(unique_ivr_shisetsu_patients)

## [1] 155685

## 155685人がIVR可能施設での治療となっている

## outcome=6,7が死亡症例であり、deathという変数を追加
df$death<-as.integer(rowSums(sapply(df[c("outcome")], 
                                 function(x) grepl("6|7", x))) > 0)
table(df$death)

## 
##      0      1 
## 215407    103

death_patients <- subset(df, death == 1)
unique_death_patients <- length(unique(death_patients$PtDmyCd))
print(unique_death_patients)

## [1] 103

## 103人が死亡している

#3. 病名の定義(癒着胎盤等)
## 癒着胎盤

df$accreta <- ifelse(grepl("O720|O722|O730", df$dadm) | 
                       grepl("O720|O722|O730", df$dcc1) | 
                       grepl("O720|O722|O730", df$dcc2)|
                       grepl("O720|O722|O730", df$dcc3)|
                       grepl("O720|O722|O730", df$dcc4)|
                       grepl("O720|O722|O730", df$dcin1)|
                       grepl("O720|O722|O730", df$dcin2)|
                       grepl("O720|O722|O730", df$dcin3)|
                       grepl("O720|O722|O730", df$dcin4), 1, 0)
table(df$accreta)

## 
##      0      1 
## 206570   8940

accreta_patients <- subset(df, accreta == 1)
unique_accreta_patients <- length(unique(accreta_patients$PtDmyCd))
print(unique_accreta_patients)

## [1] 8797

## 8797人が癒着胎盤の診断

## 前置胎盤
df$previa <- ifelse(grepl("O441", df$dadm) | 
                       grepl("O441", df$dcc1) | 
                       grepl("O441", df$dcc2)|
                       grepl("O441", df$dcc3)|
                       grepl("O441", df$dcc4)|
                       grepl("O441", df$dcin1)|
                       grepl("O441", df$dcin2)|
                       grepl("O441", df$dcin3)|
                       grepl("O441", df$dcin4), 1, 0)
table(df$previa)

## 
##      0      1 
## 200829  14681

previa_patients <- subset(df, previa == 1)
unique_previa_patients <- length(unique(previa_patients$PtDmyCd))
print(unique_previa_patients)

## [1] 14612

#4. データ解析の前の最終チェック
# 連続変数の傾向確認
hist(df$age)

hist(df$bunketu)

hist(df$入院期間)

summary(df$bunketu)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0    1025    1255    9101    1734   99999    3492

df$age <- replace(df$age, df$age <= 15 | df$age > 60, NA)
df$bunketu <- replace(df$bunketu, df$bunketu>=9999 , NA)
df$入院期間 <- replace(df$入院期間, df$入院期間 > 100, NA)
table(df$hysterectomy)

## 
##      0      1 
## 213675   1835

colnames(df)

##  [1] "adladm"       "adldis"       "admpath"      "age"          "ambul"       
##  [6] "brthw"        "brthwk"       "bunketu"      "dadm"         "dcc1"        
## [11] "dcc2"         "dcc3"         "dcc4"         "dcin1"        "dcin2"       
## [16] "dcin3"        "dcin4"        "dislasts"     "disto"        "dmain"       
## [21] "dres1"        "dres2"        "dth24h"       "height"       "HospDmycd"   
## [26] "jcs"          "jcsdis"       "kacd"         "nyuseqno"     "opean1"      
## [31] "opean2"       "opean3"       "opean4"       "opean5"       "opek1"       
## [36] "opek2"        "opek3"        "opek4"        "opek5"        "outcome"     
## [41] "preg"         "pregwkadm"    "PtDmyCd"      "readmkind"    "readmrea"    
## [46] "sex"          "smokingidx"   "urgency"      "weight"       "手術1相対日" 
## [51] "手術2相対日"  "手術3相対日"  "手術4相対日"  "手術5相対日"  "入院期間"    
## [56] "hysterectomy" "tae"          "cs"           "total_tae"    "ivr_shisetsu"
## [61] "death"        "accreta"      "previa"

#5. 表1の作成
unique_df <- df %>%
  group_by(PtDmyCd) %>%
  summarise(入院期間 = max(入院期間), age = max(age), bunketu=max(bunketu),cs=max(cs),death=max(death),tae=max(tae),
            ivr_shisetsu=max(ivr_shisetsu),accreta=max(accreta),previa=max(previa),
            hysterectomy = max(hysterectomy))

colnames(df)

##  [1] "adladm"       "adldis"       "admpath"      "age"          "ambul"       
##  [6] "brthw"        "brthwk"       "bunketu"      "dadm"         "dcc1"        
## [11] "dcc2"         "dcc3"         "dcc4"         "dcin1"        "dcin2"       
## [16] "dcin3"        "dcin4"        "dislasts"     "disto"        "dmain"       
## [21] "dres1"        "dres2"        "dth24h"       "height"       "HospDmycd"   
## [26] "jcs"          "jcsdis"       "kacd"         "nyuseqno"     "opean1"      
## [31] "opean2"       "opean3"       "opean4"       "opean5"       "opek1"       
## [36] "opek2"        "opek3"        "opek4"        "opek5"        "outcome"     
## [41] "preg"         "pregwkadm"    "PtDmyCd"      "readmkind"    "readmrea"    
## [46] "sex"          "smokingidx"   "urgency"      "weight"       "手術1相対日" 
## [51] "手術2相対日"  "手術3相対日"  "手術4相対日"  "手術5相対日"  "入院期間"    
## [56] "hysterectomy" "tae"          "cs"           "total_tae"    "ivr_shisetsu"
## [61] "death"        "accreta"      "previa"

CreateTableOne(vars = c("入院期間", "age","bunketu"),
                 strata = "hysterectomy",addOverall = TRUE,
                 data = unique_df)

##                       Stratified by hysterectomy
##                        Overall          0                1                
##   n                     209555           207720             1835          
##   入院期間 (mean (SD))   12.30 (12.75)    12.22 (12.65)    22.19 (18.82)  
##   age (mean (SD))        33.51 (5.38)     33.48 (5.38)     36.36 (4.78)   
##   bunketu (mean (SD))  1330.21 (698.85) 1315.47 (660.18) 3125.28 (1908.60)
##                       Stratified by hysterectomy
##                        p      test
##   n                               
##   入院期間 (mean (SD)) <0.001     
##   age (mean (SD))      <0.001     
##   bunketu (mean (SD))  <0.001

table(unique_df$hysterectomy, unique_df$cs)

##    
##          0      1
##   0  84348 123372
##   1   1050    785

cross_table <- table(unique_df$hysterectomy, unique_df$cs)
chisq.test(cross_table)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  cross_table
## X-squared = 207.26, df = 1, p-value < 2.2e-16

table(unique_df$hysterectomy, unique_df$death)

##    
##          0      1
##   0 207633     87
##   1   1819     16

cross_table2 <- table(unique_df$hysterectomy, unique_df$death)
chisq.test(cross_table2, simulate.p.value = TRUE)

## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  cross_table2
## X-squared = 255.09, df = NA, p-value = 0.0004998

table(unique_df$hysterectomy, unique_df$tae)

##    
##          0      1
##   0 204541   3179
##   1   1503    332

cross_table3 <- table(unique_df$hysterectomy, unique_df$tae)
chisq.test(cross_table3)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  cross_table3
## X-squared = 3018.7, df = 1, p-value < 2.2e-16

table(unique_df$hysterectomy, unique_df$ivr_shisetsu)

##    
##          0      1
##   0  53682 154038
##   1    188   1647

cross_table4 <- table(unique_df$hysterectomy, unique_df$ivr_shisetsu)
chisq.test(cross_table4, simulate.p.value = TRUE)

## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  cross_table4
## X-squared = 231.72, df = NA, p-value = 0.0004998

table(unique_df$hysterectomy, unique_df$accreta)

##    
##          0      1
##   0 199654   8066
##   1   1104    731

cross_table5 <- table(unique_df$hysterectomy, unique_df$accreta)
chisq.test(cross_table5, simulate.p.value = TRUE)

## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  cross_table5
## X-squared = 5846.4, df = NA, p-value = 0.0004998

table(unique_df$hysterectomy, unique_df$previa)

##    
##          0      1
##   0 193709  14011
##   1   1234    601

cross_table6 <- table(unique_df$hysterectomy, unique_df$previa)
chisq.test(cross_table6, simulate.p.value = TRUE)

## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  cross_table6
## X-squared = 1896.6, df = NA, p-value = 0.0004998

colnames(df)

##  [1] "adladm"       "adldis"       "admpath"      "age"          "ambul"       
##  [6] "brthw"        "brthwk"       "bunketu"      "dadm"         "dcc1"        
## [11] "dcc2"         "dcc3"         "dcc4"         "dcin1"        "dcin2"       
## [16] "dcin3"        "dcin4"        "dislasts"     "disto"        "dmain"       
## [21] "dres1"        "dres2"        "dth24h"       "height"       "HospDmycd"   
## [26] "jcs"          "jcsdis"       "kacd"         "nyuseqno"     "opean1"      
## [31] "opean2"       "opean3"       "opean4"       "opean5"       "opek1"       
## [36] "opek2"        "opek3"        "opek4"        "opek5"        "outcome"     
## [41] "preg"         "pregwkadm"    "PtDmyCd"      "readmkind"    "readmrea"    
## [46] "sex"          "smokingidx"   "urgency"      "weight"       "手術1相対日" 
## [51] "手術2相対日"  "手術3相対日"  "手術4相対日"  "手術5相対日"  "入院期間"    
## [56] "hysterectomy" "tae"          "cs"           "total_tae"    "ivr_shisetsu"
## [61] "death"        "accreta"      "previa"

#6. previaとaccretaを除外したdfを作って解析
patients_with_history <- unique(df$PtDmyCd[df$previa == 1 | df$accreta == 1])
new_df <- df[!(df$PtDmyCd %in% patients_with_history), ]
unique_new_df <- new_df %>%
  group_by(PtDmyCd) %>%
  summarise(入院期間 = max(入院期間), age = max(age), bunketu=max(bunketu),cs=max(cs),death=max(death),tae=max(tae),
            ivr_shisetsu=max(ivr_shisetsu),
            hysterectomy = max(hysterectomy))
unique_new_df$cs <- factor(unique_new_df$cs)
unique_new_df$death<- factor(unique_new_df$death)
unique_new_df$tae <- factor(unique_new_df$tae)
unique_new_df$ivr_shisetsu <- factor(unique_new_df$ivr_shisetsu)
vars<-c("入院期間","age","bunketu","cs","death","tae","ivr_shisetsu")
factor_var<-c("cs","death","tae","ivr_shisetsu")

CreateTableOne(vars = vars,
               factorVars=factor_var,
               includeNA = T,
               addOverall = T,
               smd = T,
               data = unique_new_df,
               testExact = fisher.test,
               testNonNormal = kruskal.test,
               strata = "hysterectomy")

##                       Stratified by hysterectomy
##                        Overall          0                1                
##   n                     187085           186273              812          
##   入院期間 (mean (SD))   11.65 (11.99)    11.63 (11.97)    16.43 (15.31)  
##   age (mean (SD))        33.38 (5.42)     33.37 (5.42)     35.99 (5.10)   
##   bunketu (mean (SD))  1279.51 (643.83) 1273.12 (626.04) 3023.98 (1879.27)
##   cs = 1 (%)            109089 (58.3)    108819 (58.4)       270 (33.3)   
##   death = 1 (%)            101 ( 0.1)        86 ( 0.0)        15 ( 1.8)   
##   tae = 1 (%)             2240 ( 1.2)      2102 ( 1.1)       138 (17.0)   
##   ivr_shisetsu = 1 (%)  136928 (73.2)    136212 (73.1)       716 (88.2)   
##                       Stratified by hysterectomy
##                        p      test
##   n                               
##   入院期間 (mean (SD)) <0.001     
##   age (mean (SD))      <0.001     
##   bunketu (mean (SD))  <0.001     
##   cs = 1 (%)           <0.001     
##   death = 1 (%)        <0.001     
##   tae = 1 (%)          <0.001     
##   ivr_shisetsu = 1 (%) <0.001

skim(unique_new_df)

Data summary
Name	unique_new_df
Number of rows	187085
Number of columns	9
_______________________
Column type frequency:
factor	4
numeric	5
________________________
Group variables	None

Variable type: factor

skim_variable	complete_rate	ordered	n_unique	top_counts
cs	1	FALSE	2	1: 109089, 0: 77996
death	1	FALSE	2	0: 186984, 1: 101
tae	1	FALSE	2	0: 184845, 1: 2240
ivr_shisetsu	1	FALSE	2	1: 136928, 0: 50157

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
PtDmyCd	0	1.0	19358417.49	16085952.05	12	3593392	16435649	34557080	52982269	▇▃▃▃▂
入院期間	578	1.0	11.65	11.99	1	7	8	10	100	▇▁▁▁▁
age	922	1.0	33.38	5.42	16	30	34	37	60	▁▇▇▁▁
bunketu	17849	0.9	1279.51	643.83	0	1000	1190	1510	9990	▇▁▁▁▁
hysterectomy	0	1.0	0.00	0.07	0	0	0	0	1	▇▁▁▁▁

table1<-CreateTableOne(vars = vars,
                       factorVars=factor_var,
                       includeNA = T,
                       addOverall = T,
                       smd = T,
                       data = unique_new_df,
                       testExact = fisher.test,
                       testNonNormal = kruskal.test,
                       strata = "hysterectomy")
print(table1,smd = T)

##                       Stratified by hysterectomy
##                        Overall          0                1                
##   n                     187085           186273              812          
##   入院期間 (mean (SD))   11.65 (11.99)    11.63 (11.97)    16.43 (15.31)  
##   age (mean (SD))        33.38 (5.42)     33.37 (5.42)     35.99 (5.10)   
##   bunketu (mean (SD))  1279.51 (643.83) 1273.12 (626.04) 3023.98 (1879.27)
##   cs = 1 (%)            109089 (58.3)    108819 (58.4)       270 (33.3)   
##   death = 1 (%)            101 ( 0.1)        86 ( 0.0)        15 ( 1.8)   
##   tae = 1 (%)             2240 ( 1.2)      2102 ( 1.1)       138 (17.0)   
##   ivr_shisetsu = 1 (%)  136928 (73.2)    136212 (73.1)       716 (88.2)   
##                       Stratified by hysterectomy
##                        p      test SMD   
##   n                                      
##   入院期間 (mean (SD)) <0.001       0.349
##   age (mean (SD))      <0.001       0.498
##   bunketu (mean (SD))  <0.001       1.250
##   cs = 1 (%)           <0.001       0.522
##   death = 1 (%)        <0.001       0.187
##   tae = 1 (%)          <0.001       0.575
##   ivr_shisetsu = 1 (%) <0.001       0.388

#5.傾向スコアマッチング
# 傾向スコアをglmを用いて求める

mean_inpatient_duration <- mean(unique_new_df$入院期間, na.rm = TRUE)
unique_new_df$入院期間[is.na(unique_new_df$入院期間)] <- mean_inpatient_duration
mean_inpatient_duration <- mean(unique_new_df$age, na.rm = TRUE)
unique_new_df$age[is.na(unique_new_df$age)] <- mean_inpatient_duration
mean_inpatient_duration <- mean(unique_new_df$bunketu, na.rm = TRUE)
unique_new_df$bunketu[is.na(unique_new_df$bunketu)] <- mean_inpatient_duration
skim(unique_new_df)

Data summary
Name	unique_new_df
Number of rows	187085
Number of columns	9
_______________________
Column type frequency:
factor	4
numeric	5
________________________
Group variables	None

Variable type: factor

skim_variable	complete_rate	ordered	n_unique	top_counts
cs	1	FALSE	2	1: 109089, 0: 77996
death	1	FALSE	2	0: 186984, 1: 101
tae	1	FALSE	2	0: 184845, 1: 2240
ivr_shisetsu	1	FALSE	2	1: 136928, 0: 50157

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
PtDmyCd	1	19358417.49	16085952.05	12	3593392	16435649	34557080	52982269	▇▃▃▃▂
入院期間	1	11.65	11.98	1	7	8	10	100	▇▁▁▁▁
age	1	33.38	5.41	16	30	34	37	60	▁▇▇▁▁
bunketu	1	1279.51	612.35	0	1020	1240	1469	9990	▇▁▁▁▁
hysterectomy	1	0.00	0.07	0	0	0	0	1	▇▁▁▁▁

propensityScoreModel <- glm(hysterectomy ~ age + ivr_shisetsu + tae, 
                            data = unique_new_df, 
                            family = binomial,
                            na.action = na.exclude)
ps<-matchit(formula=hysterectomy ~ age + ivr_shisetsu + tae, 
            data=unique_new_df,
            method="nearest",
            distance="glm",
            caliper=0.1,
            ratio=1)
dfPSMatched <- match.data(ps)
CreateTableOne(vars = vars,
               factorVars=factor_var,
               includeNA = T,
               addOverall = T,
               smd = T,
               data = dfPSMatched,
               testExact = fisher.test,
               testNonNormal = kruskal.test,
               strata = "hysterectomy")

##                       Stratified by hysterectomy
##                        Overall           0                1                
##   n                       1624               812              812          
##   入院期間 (mean (SD))   13.90 (13.55)     11.47 (11.19)    16.34 (15.17)  
##   age (mean (SD))        35.98 (5.09)      35.98 (5.09)     35.98 (5.09)   
##   bunketu (mean (SD))  2034.90 (1546.37) 1462.60 (942.73) 2607.19 (1800.25)
##   cs = 1 (%)               738 (45.4)        468 (57.6)       270 (33.3)   
##   death = 1 (%)             17 ( 1.0)          2 ( 0.2)        15 ( 1.8)   
##   tae = 1 (%)              276 (17.0)        138 (17.0)       138 (17.0)   
##   ivr_shisetsu = 1 (%)    1432 (88.2)        716 (88.2)       716 (88.2)   
##                       Stratified by hysterectomy
##                        p      test
##   n                               
##   入院期間 (mean (SD)) <0.001     
##   age (mean (SD))       1.000     
##   bunketu (mean (SD))  <0.001     
##   cs = 1 (%)           <0.001     
##   death = 1 (%)         0.003     
##   tae = 1 (%)           1.000     
##   ivr_shisetsu = 1 (%)  1.000

table2<-CreateTableOne(vars = vars,
               factorVars=factor_var,
               includeNA = T,
               addOverall = T,
               smd = T,
               data = dfPSMatched,
               testExact = fisher.test,
               testNonNormal = kruskal.test,
               strata = "hysterectomy")
print(table2,smd = T)

##                       Stratified by hysterectomy
##                        Overall           0                1                
##   n                       1624               812              812          
##   入院期間 (mean (SD))   13.90 (13.55)     11.47 (11.19)    16.34 (15.17)  
##   age (mean (SD))        35.98 (5.09)      35.98 (5.09)     35.98 (5.09)   
##   bunketu (mean (SD))  2034.90 (1546.37) 1462.60 (942.73) 2607.19 (1800.25)
##   cs = 1 (%)               738 (45.4)        468 (57.6)       270 (33.3)   
##   death = 1 (%)             17 ( 1.0)          2 ( 0.2)        15 ( 1.8)   
##   tae = 1 (%)              276 (17.0)        138 (17.0)       138 (17.0)   
##   ivr_shisetsu = 1 (%)    1432 (88.2)        716 (88.2)       716 (88.2)   
##                       Stratified by hysterectomy
##                        p      test SMD   
##   n                                      
##   入院期間 (mean (SD)) <0.001       0.365
##   age (mean (SD))       1.000      <0.001
##   bunketu (mean (SD))  <0.001       0.797
##   cs = 1 (%)           <0.001       0.505
##   death = 1 (%)         0.003       0.158
##   tae = 1 (%)           1.000      <0.001
##   ivr_shisetsu = 1 (%)  1.000      <0.001

#今後の修正点
#欠測補完をとりあえず適当に平均代入法で行っているためこれを修正すること

Untitled

2024-04-04

R Markdown