#がん罹患データはカード番号12-17に入っている
setwd("~/Dropbox/JACC_study/thyroid_cancer")
load("~/Dropbox/JACC_study/thyroid_cancer/Call12_13.Rdata")
load("~/Dropbox/JACC_study/thyroid_cancer/Call14_15.Rdata")
load("~/Dropbox/JACC_study/thyroid_cancer/Call16_17.Rdata")
library(epicalc)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
#4つの診断名
table(Call12_13$' ICD10i_1 ')
##
## C004 C008 C021 C022 C028 C029 C030 C031 C039
## 120866 2 1 6 1 1 21 6 5 4
## C049 C050 C051 C058 C059 C060 C069 C07 C079 C080
## 4 1 3 1 1 2 5 6 1 4
## C081 C089 C103 C109 C119 C12 C132 C138 C139 C140
## 2 1 1 3 5 3 1 2 7 3
## C150 C151 C152 C153 C154 C155 C158 C159 C160 C161
## 1 11 5 2 8 15 3 71 66 23
## C162 C163 C164 C165 C166 C168 C169 C170 C171 C172
## 289 243 24 7 3 32 800 14 1 2
## C179 C180 C181 C182 C183 C184 C185 C186 C187 C188
## 4 68 7 147 3 84 3 40 241 4
## C189 C19 C196 C199 C20 C209 C210 C218 C220 C221
## 109 28 1 1 328 5 3 5 194 13
## C223 C227 C229 C23 C239 C240 C241 C249 C25 C250
## 1 28 168 79 3 87 22 5 2 72
## C251 C252 C254 C258 C259 C269 C301 C310 C311 C318
## 17 14 1 5 121 3 1 12 1 1
## C319 C320 C322 C329 C33 C340 C341 C342 C343 C348
## 1 14 1 30 3 13 143 8 82 4
## C349 C37 C383 C384 C400 C402 C412 C414 C433 C436
## 439 8 4 2 1 2 3 2 1 4
## C437 C438 C439 C440 C441 C442 C443 C444 C445 C446
## 1 1 7 2 5 6 30 4 2 8
## C447 C448 C449 C450 C451 C467 C480 C481 C482 C490
## 4 1 10 2 1 1 3 1 2 2
## C491 C492 C494 C495 C496 C499 C50 C500 C501 C502
## 1 3 3 3 1 3 1 3 5 21
## C503 C504 C505 C508 C509 C519 C52 C53 C531 C539
## 1 35 8 20 211 2 3 1 1 58
## C540 C541 C542 C549 C55 C56 C570 C579 C600 C609
## 1 19 7 52 15 52 1 1 1 2
## C61 C619 C621 C629 C631 C64 C649 C65 C66 C670
## 301 19 3 4 2 71 4 21 12 1
## C672 C673 C674 C675 C678 C679 C680 C688 C700 C710
## 4 2 2 2 8 151 3 1 6 2
## C711 C712 C713 C714 C719 C724 C729 C73 C739 C741
## 3 4 1 1 15 1 2 125 1 1
## C749 C751 C752 C760 C761 C762 C767 C770 C772 C774
## 1 1 1 3 1 1 4 1 2 1
## C779 C780 C786 C787 C793 C795 C80 C811 C812 C813
## 3 1 3 2 1 1 69 1 1 1
## C817 C819 C820 C821 C822 C829 C831 C832 C833 C838
## 1 4 1 8 1 1 1 3 20 1
## C839 C844 C845 C850 C857 C859 C880 C900 C902 C910
## 1 2 1 2 1 73 1 44 1 7
## C911 C915 C919 C920 C921 C924 C925 C930 C939 C947
## 5 5 2 28 11 1 3 1 1 2
## C950 C957 C959 C961 D000 D001 D002 D010 D011 D012
## 1 1 1 2 1 6 33 22 2 10
## D020 D022 D041 D044 D045 D046 D047 D049 D051 D057
## 2 1 1 1 1 3 8 4 1 2
## D059 D069 D071 D074 D090 D091 D11 D110 D122 D123
## 5 26 2 1 1 2 1 1 1 1
## D128 D130 D143 D150 D219 D231 D260 D27 D320 D352
## 1 1 1 1 1 1 1 4 1 3
## D371 D372 D374 D375 D377 D381 D391 D400 D414 D432
## 3 1 1 1 2 1 2 2 1 2
## D441 D45 D460 D469
## 1 1 1 12
table(Call14_15$' ICD10i_2 ')
##
## C000 C01 C030 C031 C069 C07 C139 C151 C154
## 126784 1 1 1 1 1 1 1 1 1
## C159 C16 C160 C161 C162 C163 C166 C168 C169 C170
## 6 1 4 2 22 12 1 1 25 2
## C180 C181 C182 C184 C186 C187 C188 C189 C19 C20
## 4 2 4 6 4 10 1 6 1 23
## C210 C220 C221 C227 C229 C23 C240 C241 C250 C252
## 1 10 1 2 9 4 4 1 8 2
## C259 C269 C310 C311 C320 C329 C341 C343 C349 C37
## 6 1 1 1 1 2 6 11 28 1
## C436 C437 C443 C444 C447 C449 C481 C493 C494 C495
## 2 1 3 1 1 2 1 1 1 1
## C504 C505 C509 C53 C539 C541 C542 C549 C55 C56
## 2 1 7 1 4 1 1 6 2 2
## C61 C64 C65 C66 C672 C673 C675 C679 C711 C719
## 22 7 4 4 2 1 1 14 1 2
## C73 C773 C779 C780 C782 C786 C787 C788 C792 C793
## 4 1 1 3 1 2 4 3 1 1
## C794 C795 C798 C80 C829 C830 C833 C845 C851 C859
## 1 4 1 11 1 1 3 1 1 6
## C900 C920 C921 C925 C931 C961 D000 D001 D002 D010
## 2 3 1 1 1 1 1 1 3 2
## D012 D090 D123 D125 D126 D128 D469
## 3 2 2 1 1 1 2
table(Call14_15$' ICD10i_3 ')
##
## C159 C162 C163 C165 C169 C180 C182 C184 C187
## 127167 1 1 1 1 2 1 2 1 2
## C189 C220 C23 C259 C343 C349 C61 C65 C679 C73
## 1 1 1 2 1 1 3 2 2 1
## C780 C782 C787 C793 C795 C833 C859 D010 D120 D143
## 2 1 3 1 2 1 1 1 1 1
table(Call16_17$' ICD10i_4 ')
##
## C61 C679 C786 C80
## 127204 1 1 1 1
#を一つのdataframeにする
diagnosed <- merge(Call12_13,Call14_15, by = "ID_wang")
diagnosed <- merge(diagnosed, Call16_17, by = "ID_wang")
#乳がんの場合ICD10
Breast_cancer <- grep("^C50[0-9]?",diagnosed$' ICD10i_1 ')
Breast_cancer1 <- grep("^C50[0-9]?",diagnosed$' ICD10i_2 ')
Breast_cancer2 <- grep("^C50[0-9]?",diagnosed$' ICD10i_3 ') #0人
Breast_cancer3 <- grep("^C50[0-9]?",diagnosed$' ICD10i_4 ') #0人
diagnosed$Bre_Can <- rep(FALSE, nrow(diagnosed))
for (i in Breast_cancer){
diagnosed$Bre_Can[diagnosed$ID_wang == i] <- TRUE
}
for (i in Breast_cancer1){
diagnosed$Bre_Can[diagnosed$ID_wang == i] <- TRUE
}
tab1(diagnosed$Bre_Can) #n = 315人が乳がん罹患
## diagnosed$Bre_Can :
## Frequency Percent Cum. percent
## FALSE 126893 99.8 99.8
## TRUE 315 0.2 100.0
## Total 127208 100.0 100.0
# ここから組織型・性状確認
seijyo <- ifelse(diagnosed$Bre_Can == TRUE, diagnosed$" ICD2nd1 ", NA)
tab1(seijyo, graph = FALSE)
## seijyo :
## Frequency %(NA+) %(NA-)
## 18 0.0 5.7
## 160821139 1 0.0 0.3
## 16280003 1 0.0 0.3
## 16380003 1 0.0 0.3
## 16980003 1 0.0 0.3
## 169821132 1 0.0 0.3
## 20980003 1 0.0 0.3
## 2323004 2 0.0 0.6
## 2324099 1 0.0 0.3
## 2410662 1 0.0 0.3
## 2414314 1 0.0 0.3
## 2417967 2 0.0 0.6
## 36586 3 0.0 1.0
## 50080003 2 0.0 0.6
## 50085403 1 0.0 0.3
## 50180003 1 0.0 0.3
## 50185003 2 0.0 0.6
## 50280003 5 0.0 1.6
## 502814139 2 0.0 0.6
## 502826039 1 0.0 0.3
## 502848039 1 0.0 0.3
## 50285003 3 0.0 1.0
## 502850032 2 0.0 0.6
## 502850039 1 0.0 0.3
## 502852033 1 0.0 0.3
## 50385003 1 0.0 0.3
## 50480003 2 0.0 0.6
## 50480503 1 0.0 0.3
## 504814139 1 0.0 0.3
## 504826039 1 0.0 0.3
## 50485003 13 0.0 4.1
## 504850031 1 0.0 0.3
## 504850033 2 0.0 0.6
## 504850039 3 0.0 1.0
## 504852039 1 0.0 0.3
## 50580003 1 0.0 0.3
## 505814139 2 0.0 0.6
## 50585003 3 0.0 1.0
## 505852039 1 0.0 0.3
## 50880003 3 0.0 1.0
## 508800039 1 0.0 0.3
## 50880503 1 0.0 0.3
## 508814039 1 0.0 0.3
## 508814139 2 0.0 0.6
## 50885003 1 0.0 0.3
## 508850033 2 0.0 0.6
## 508850039 2 0.0 0.6
## 508852039 2 0.0 0.6
## 50980003 53 0.0 16.8
## 509800039 9 0.0 2.9
## 50980103 1 0.0 0.3
## 509801039 1 0.0 0.3
## 509807039 1 0.0 0.3
## 50981403 1 0.0 0.3
## 509814039 8 0.0 2.5
## 50981413 20 0.0 6.3
## 509814130 1 0.0 0.3
## 509814132 1 0.0 0.3
## 509814139 6 0.0 1.9
## 509821139 1 0.0 0.3
## 509823039 1 0.0 0.3
## 50982603 3 0.0 1.0
## 509826039 2 0.0 0.6
## 50982633 3 0.0 1.0
## 50984013 2 0.0 0.6
## 50984803 1 0.0 0.3
## 509848039 2 0.0 0.6
## 50985003 29 0.0 9.2
## 509850031 4 0.0 1.3
## 509850032 4 0.0 1.3
## 509850033 3 0.0 1.0
## 509850039 26 0.0 8.3
## 50985033 3 0.0 1.0
## 50985103 8 0.0 2.5
## 509851032 1 0.0 0.3
## 50985203 4 0.0 1.3
## 509852032 1 0.0 0.3
## 509852139 3 0.0 1.0
## 509853039 1 0.0 0.3
## 50985733 1 0.0 0.3
## 50988103 1 0.0 0.3
## 54981403 1 0.0 0.3
## 67812033 1 0.0 0.3
## 739800039 1 0.0 0.3
## 739814039 1 0.0 0.3
## 81403 1 0.0 0.3
## <NA> 126893 99.8 0.0
## Total 127208 100.0 100.0
# 50からスタートの乳がん原発ですので,それらの組織型のデータしか使えない.
# JACC研究は,ICD-o-2の組織型コードを使う
参考リンク:
1. ICD-o-2とICD-o-3の対応
2. 乳がんの組織型分類P9