#がん罹患データはカード番号12-17に入っている
setwd("~/Dropbox/JACC_study/thyroid_cancer")
load("~/Dropbox/JACC_study/thyroid_cancer/Call12_13.Rdata")
load("~/Dropbox/JACC_study/thyroid_cancer/Call14_15.Rdata")
load("~/Dropbox/JACC_study/thyroid_cancer/Call16_17.Rdata")
library(epicalc)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
#4つの診断名
table(Call12_13$' ICD10i_1 ')
## 
##         C004   C008   C021   C022   C028   C029   C030   C031   C039  
## 120866      2      1      6      1      1     21      6      5      4 
##  C049   C050   C051   C058   C059   C060   C069    C07   C079   C080  
##      4      1      3      1      1      2      5      6      1      4 
##  C081   C089   C103   C109   C119    C12   C132   C138   C139   C140  
##      2      1      1      3      5      3      1      2      7      3 
##  C150   C151   C152   C153   C154   C155   C158   C159   C160   C161  
##      1     11      5      2      8     15      3     71     66     23 
##  C162   C163   C164   C165   C166   C168   C169   C170   C171   C172  
##    289    243     24      7      3     32    800     14      1      2 
##  C179   C180   C181   C182   C183   C184   C185   C186   C187   C188  
##      4     68      7    147      3     84      3     40    241      4 
##  C189    C19   C196   C199    C20   C209   C210   C218   C220   C221  
##    109     28      1      1    328      5      3      5    194     13 
##  C223   C227   C229    C23   C239   C240   C241   C249    C25   C250  
##      1     28    168     79      3     87     22      5      2     72 
##  C251   C252   C254   C258   C259   C269   C301   C310   C311   C318  
##     17     14      1      5    121      3      1     12      1      1 
##  C319   C320   C322   C329    C33   C340   C341   C342   C343   C348  
##      1     14      1     30      3     13    143      8     82      4 
##  C349    C37   C383   C384   C400   C402   C412   C414   C433   C436  
##    439      8      4      2      1      2      3      2      1      4 
##  C437   C438   C439   C440   C441   C442   C443   C444   C445   C446  
##      1      1      7      2      5      6     30      4      2      8 
##  C447   C448   C449   C450   C451   C467   C480   C481   C482   C490  
##      4      1     10      2      1      1      3      1      2      2 
##  C491   C492   C494   C495   C496   C499    C50   C500   C501   C502  
##      1      3      3      3      1      3      1      3      5     21 
##  C503   C504   C505   C508   C509   C519    C52    C53   C531   C539  
##      1     35      8     20    211      2      3      1      1     58 
##  C540   C541   C542   C549    C55    C56   C570   C579   C600   C609  
##      1     19      7     52     15     52      1      1      1      2 
##   C61   C619   C621   C629   C631    C64   C649    C65    C66   C670  
##    301     19      3      4      2     71      4     21     12      1 
##  C672   C673   C674   C675   C678   C679   C680   C688   C700   C710  
##      4      2      2      2      8    151      3      1      6      2 
##  C711   C712   C713   C714   C719   C724   C729    C73   C739   C741  
##      3      4      1      1     15      1      2    125      1      1 
##  C749   C751   C752   C760   C761   C762   C767   C770   C772   C774  
##      1      1      1      3      1      1      4      1      2      1 
##  C779   C780   C786   C787   C793   C795    C80   C811   C812   C813  
##      3      1      3      2      1      1     69      1      1      1 
##  C817   C819   C820   C821   C822   C829   C831   C832   C833   C838  
##      1      4      1      8      1      1      1      3     20      1 
##  C839   C844   C845   C850   C857   C859   C880   C900   C902   C910  
##      1      2      1      2      1     73      1     44      1      7 
##  C911   C915   C919   C920   C921   C924   C925   C930   C939   C947  
##      5      5      2     28     11      1      3      1      1      2 
##  C950   C957   C959   C961   D000   D001   D002   D010   D011   D012  
##      1      1      1      2      1      6     33     22      2     10 
##  D020   D022   D041   D044   D045   D046   D047   D049   D051   D057  
##      2      1      1      1      1      3      8      4      1      2 
##  D059   D069   D071   D074   D090   D091    D11   D110   D122   D123  
##      5     26      2      1      1      2      1      1      1      1 
##  D128   D130   D143   D150   D219   D231   D260    D27   D320   D352  
##      1      1      1      1      1      1      1      4      1      3 
##  D371   D372   D374   D375   D377   D381   D391   D400   D414   D432  
##      3      1      1      1      2      1      2      2      1      2 
##  D441    D45   D460   D469  
##      1      1      1     12
table(Call14_15$' ICD10i_2 ')
## 
##         C000    C01   C030   C031   C069    C07   C139   C151   C154  
## 126784      1      1      1      1      1      1      1      1      1 
##  C159    C16   C160   C161   C162   C163   C166   C168   C169   C170  
##      6      1      4      2     22     12      1      1     25      2 
##  C180   C181   C182   C184   C186   C187   C188   C189    C19    C20  
##      4      2      4      6      4     10      1      6      1     23 
##  C210   C220   C221   C227   C229    C23   C240   C241   C250   C252  
##      1     10      1      2      9      4      4      1      8      2 
##  C259   C269   C310   C311   C320   C329   C341   C343   C349    C37  
##      6      1      1      1      1      2      6     11     28      1 
##  C436   C437   C443   C444   C447   C449   C481   C493   C494   C495  
##      2      1      3      1      1      2      1      1      1      1 
##  C504   C505   C509    C53   C539   C541   C542   C549    C55    C56  
##      2      1      7      1      4      1      1      6      2      2 
##   C61    C64    C65    C66   C672   C673   C675   C679   C711   C719  
##     22      7      4      4      2      1      1     14      1      2 
##   C73   C773   C779   C780   C782   C786   C787   C788   C792   C793  
##      4      1      1      3      1      2      4      3      1      1 
##  C794   C795   C798    C80   C829   C830   C833   C845   C851   C859  
##      1      4      1     11      1      1      3      1      1      6 
##  C900   C920   C921   C925   C931   C961   D000   D001   D002   D010  
##      2      3      1      1      1      1      1      1      3      2 
##  D012   D090   D123   D125   D126   D128   D469  
##      3      2      2      1      1      1      2
table(Call14_15$' ICD10i_3 ')
## 
##         C159   C162   C163   C165   C169   C180   C182   C184   C187  
## 127167      1      1      1      1      2      1      2      1      2 
##  C189   C220    C23   C259   C343   C349    C61    C65   C679    C73  
##      1      1      1      2      1      1      3      2      2      1 
##  C780   C782   C787   C793   C795   C833   C859   D010   D120   D143  
##      2      1      3      1      2      1      1      1      1      1
table(Call16_17$' ICD10i_4 ')
## 
##          C61   C679   C786    C80  
## 127204      1      1      1      1
#を一つのdataframeにする
diagnosed <- merge(Call12_13,Call14_15, by = "ID_wang")
diagnosed <- merge(diagnosed, Call16_17, by = "ID_wang")

#乳がんの場合ICD10
Breast_cancer <- grep("^C50[0-9]?",diagnosed$' ICD10i_1 ')
Breast_cancer1 <- grep("^C50[0-9]?",diagnosed$' ICD10i_2 ')
Breast_cancer2 <- grep("^C50[0-9]?",diagnosed$' ICD10i_3 ') #0人
Breast_cancer3 <- grep("^C50[0-9]?",diagnosed$' ICD10i_4 ') #0人

diagnosed$Bre_Can <- rep(FALSE, nrow(diagnosed))


for (i in Breast_cancer){ 
  diagnosed$Bre_Can[diagnosed$ID_wang == i] <- TRUE
}
for (i in Breast_cancer1){ 
  diagnosed$Bre_Can[diagnosed$ID_wang == i] <- TRUE
}


tab1(diagnosed$Bre_Can) #n = 315人が乳がん罹患

## diagnosed$Bre_Can : 
##         Frequency Percent Cum. percent
## FALSE      126893    99.8         99.8
## TRUE          315     0.2        100.0
##   Total    127208   100.0        100.0
# ここから組織型・性状確認
seijyo <- ifelse(diagnosed$Bre_Can == TRUE, diagnosed$" ICD2nd1 ", NA)
tab1(seijyo, graph = FALSE)
## seijyo : 
##            Frequency   %(NA+)   %(NA-)
##                   18      0.0      5.7
## 160821139          1      0.0      0.3
## 16280003           1      0.0      0.3
## 16380003           1      0.0      0.3
## 16980003           1      0.0      0.3
## 169821132          1      0.0      0.3
## 20980003           1      0.0      0.3
## 2323004            2      0.0      0.6
## 2324099            1      0.0      0.3
## 2410662            1      0.0      0.3
## 2414314            1      0.0      0.3
## 2417967            2      0.0      0.6
## 36586              3      0.0      1.0
## 50080003           2      0.0      0.6
## 50085403           1      0.0      0.3
## 50180003           1      0.0      0.3
## 50185003           2      0.0      0.6
## 50280003           5      0.0      1.6
## 502814139          2      0.0      0.6
## 502826039          1      0.0      0.3
## 502848039          1      0.0      0.3
## 50285003           3      0.0      1.0
## 502850032          2      0.0      0.6
## 502850039          1      0.0      0.3
## 502852033          1      0.0      0.3
## 50385003           1      0.0      0.3
## 50480003           2      0.0      0.6
## 50480503           1      0.0      0.3
## 504814139          1      0.0      0.3
## 504826039          1      0.0      0.3
## 50485003          13      0.0      4.1
## 504850031          1      0.0      0.3
## 504850033          2      0.0      0.6
## 504850039          3      0.0      1.0
## 504852039          1      0.0      0.3
## 50580003           1      0.0      0.3
## 505814139          2      0.0      0.6
## 50585003           3      0.0      1.0
## 505852039          1      0.0      0.3
## 50880003           3      0.0      1.0
## 508800039          1      0.0      0.3
## 50880503           1      0.0      0.3
## 508814039          1      0.0      0.3
## 508814139          2      0.0      0.6
## 50885003           1      0.0      0.3
## 508850033          2      0.0      0.6
## 508850039          2      0.0      0.6
## 508852039          2      0.0      0.6
## 50980003          53      0.0     16.8
## 509800039          9      0.0      2.9
## 50980103           1      0.0      0.3
## 509801039          1      0.0      0.3
## 509807039          1      0.0      0.3
## 50981403           1      0.0      0.3
## 509814039          8      0.0      2.5
## 50981413          20      0.0      6.3
## 509814130          1      0.0      0.3
## 509814132          1      0.0      0.3
## 509814139          6      0.0      1.9
## 509821139          1      0.0      0.3
## 509823039          1      0.0      0.3
## 50982603           3      0.0      1.0
## 509826039          2      0.0      0.6
## 50982633           3      0.0      1.0
## 50984013           2      0.0      0.6
## 50984803           1      0.0      0.3
## 509848039          2      0.0      0.6
## 50985003          29      0.0      9.2
## 509850031          4      0.0      1.3
## 509850032          4      0.0      1.3
## 509850033          3      0.0      1.0
## 509850039         26      0.0      8.3
## 50985033           3      0.0      1.0
## 50985103           8      0.0      2.5
## 509851032          1      0.0      0.3
## 50985203           4      0.0      1.3
## 509852032          1      0.0      0.3
## 509852139          3      0.0      1.0
## 509853039          1      0.0      0.3
## 50985733           1      0.0      0.3
## 50988103           1      0.0      0.3
## 54981403           1      0.0      0.3
## 67812033           1      0.0      0.3
## 739800039          1      0.0      0.3
## 739814039          1      0.0      0.3
## 81403              1      0.0      0.3
## <NA>          126893     99.8      0.0
##   Total       127208    100.0    100.0
# 50からスタートの乳がん原発ですので,それらの組織型のデータしか使えない.
# JACC研究は,ICD-o-2の組織型コードを使う

参考リンク:
1. ICD-o-2とICD-o-3の対応
2. 乳がんの組織型分類P9