library(TCGAbiolinks)
library(dplyr)
query.exp <- GDCquery(project = c("TCGA-COAD","TCGA-READ"),
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM-UQ")
exp.tab <- getResults(query.exp) %>%
dplyr::filter(!is_ffpe & sample_type == "Primary Tumor")
exp.samples <- exp.tab %>% pull(sample.submitter_id)
query.met <- GDCquery(project = c("TCGA-COAD","TCGA-READ"),
data.category = "DNA Methylation",
platform = "Illumina Human Methylation 450")
met.tab <- getResults(query.met) %>%
dplyr::filter(!is_ffpe & sample_type == "Primary Tumor")
met.samples <- met.tab %>% pull(sample.submitter_id)
## [1] 631
## [1] 399
# Samples with methylation and expression
exp.met.samples <- intersect(exp.samples,met.samples)
length(exp.met.samples)
## [1] 392
clin.coad <- GDCquery_clinic("TCGA-COAD")
stages_II_III <- grep("II|III",clin.coad$ajcc_pathologic_stage %>% unique(),value = TRUE) %>% sort
stages_II_III
## [1] "Stage II" "Stage IIA" "Stage IIB" "Stage IIC" "Stage III"
## [6] "Stage IIIA" "Stage IIIB" "Stage IIIC"
coad.samples.stage_II_III <- clin.coad %>%
dplyr::filter(vital_status %in% c("Alive","Dead")) %>%
dplyr::filter(ajcc_pathologic_stage %in% stages_II_III) %>%
pull(submitter_id) %>% as.character()
clin.read <- GDCquery_clinic("TCGA-READ")
stages_II_III <- grep("II|III",clin.read$ajcc_pathologic_stage %>% unique(),value = TRUE) %>% sort
stages_II_III
## [1] "Stage II" "Stage IIA" "Stage IIB" "Stage IIC" "Stage III"
## [6] "Stage IIIA" "Stage IIIB" "Stage IIIC"
read.samples.stage_II_III <- clin.read %>%
dplyr::filter(vital_status %in% c("Alive","Dead")) %>%
dplyr::filter(ajcc_pathologic_stage %in% stages_II_III) %>%
pull(submitter_id) %>% as.character()
exp.met.samples.stage_II_and_III.with.survival <- intersect(substr(exp.met.samples,1,12),
c(coad.samples.stage_II_III,read.samples.stage_II_III))
length(exp.met.samples.stage_II_and_III.with.survival)
## [1] 261
TCGA-READ-COAD stage II and III primary tumors samples with DNA methylation, gene expression and survival information.
## [1] 261
## [1] "TCGA-AZ-6601" "TCGA-A6-6137" "TCGA-CM-5860" "TCGA-F4-6461" "TCGA-D5-6927"
## [6] "TCGA-CK-5913" "TCGA-AZ-6605" "TCGA-A6-6780" "TCGA-NH-A6GB" "TCGA-AY-6386"
## [11] "TCGA-CA-5796" "TCGA-A6-5660" "TCGA-D5-6926" "TCGA-CM-6679" "TCGA-G4-6309"
## [16] "TCGA-G4-6586" "TCGA-NH-A8F7" "TCGA-D5-6932" "TCGA-D5-6931" "TCGA-G4-6311"
## [21] "TCGA-D5-6535" "TCGA-F4-6703" "TCGA-A6-2675" "TCGA-DM-A28A" "TCGA-DM-A1HB"
## [26] "TCGA-G4-6310" "TCGA-D5-6531" "TCGA-CM-6164" "TCGA-F4-6854" "TCGA-A6-6781"
## [31] "TCGA-D5-6928" "TCGA-AA-3712" "TCGA-A6-5661" "TCGA-A6-5657" "TCGA-G4-6302"
## [36] "TCGA-QG-A5YV" "TCGA-CM-4752" "TCGA-AA-3496" "TCGA-NH-A6GA" "TCGA-A6-3809"
## [41] "TCGA-CK-6747" "TCGA-AY-6197" "TCGA-A6-2677" "TCGA-AD-A5EJ" "TCGA-AA-3492"
## [46] "TCGA-4T-AA8H" "TCGA-F4-6855" "TCGA-RU-A8FL" "TCGA-D5-6532" "TCGA-A6-6140"
## [51] "TCGA-A6-4105" "TCGA-A6-A5ZU" "TCGA-QG-A5YW" "TCGA-D5-6920" "TCGA-NH-A50T"
## [56] "TCGA-D5-6529" "TCGA-G4-6317" "TCGA-CM-6165" "TCGA-F4-6807" "TCGA-DM-A1D4"
## [61] "TCGA-G4-6307" "TCGA-G4-6299" "TCGA-DM-A28F" "TCGA-A6-6651" "TCGA-D5-5540"
## [66] "TCGA-DM-A280" "TCGA-NH-A5IV" "TCGA-CM-6168" "TCGA-G4-6626" "TCGA-DM-A1DA"
## [71] "TCGA-A6-A566" "TCGA-CM-5863" "TCGA-AA-3660" "TCGA-CA-6716" "TCGA-CK-6746"
## [76] "TCGA-CA-6715" "TCGA-DM-A288" "TCGA-D5-6534" "TCGA-D5-5539" "TCGA-AU-3779"
## [81] "TCGA-A6-6649" "TCGA-AA-3663" "TCGA-CM-5861" "TCGA-F4-6463" "TCGA-CM-4743"
## [86] "TCGA-D5-6541" "TCGA-A6-2686" "TCGA-T9-A92H" "TCGA-A6-2685" "TCGA-A6-A56B"
## [91] "TCGA-A6-4107" "TCGA-G4-6588" "TCGA-CA-5797" "TCGA-DM-A282" "TCGA-CM-5348"
## [96] "TCGA-G4-6298" "TCGA-AA-3489" "TCGA-4N-A93T" "TCGA-CA-6719" "TCGA-AA-3655"
## [101] "TCGA-G4-6627" "TCGA-CK-4947" "TCGA-CA-6718" "TCGA-F4-6704" "TCGA-AD-6899"
## [106] "TCGA-CM-5341" "TCGA-D5-6537" "TCGA-F4-6570" "TCGA-5M-AATE" "TCGA-A6-2679"
## [111] "TCGA-G4-6295" "TCGA-DM-A28M" "TCGA-CM-4750" "TCGA-WS-AB45" "TCGA-AA-3697"
## [116] "TCGA-A6-6650" "TCGA-AZ-5403" "TCGA-CK-4948" "TCGA-CK-4950" "TCGA-D5-6536"
## [121] "TCGA-AD-6888" "TCGA-F4-6805" "TCGA-DM-A1D6" "TCGA-DM-A1D9" "TCGA-G4-6321"
## [126] "TCGA-CK-4951" "TCGA-CM-6169" "TCGA-DM-A1HA" "TCGA-A6-5665" "TCGA-DM-A1D0"
## [131] "TCGA-A6-A565" "TCGA-AA-3510" "TCGA-CA-6717" "TCGA-D5-6538" "TCGA-DM-A28H"
## [136] "TCGA-A6-6782" "TCGA-CM-4748" "TCGA-G4-6320" "TCGA-CM-5349" "TCGA-CM-4751"
## [141] "TCGA-SS-A7HO" "TCGA-AA-3509" "TCGA-D5-5541" "TCGA-A6-6654" "TCGA-A6-2681"
## [146] "TCGA-AA-3511" "TCGA-CM-6172" "TCGA-DM-A28G" "TCGA-AZ-6608" "TCGA-QG-A5Z1"
## [151] "TCGA-CA-5254" "TCGA-AD-6895" "TCGA-D5-6922" "TCGA-F4-6459" "TCGA-DM-A0XF"
## [156] "TCGA-CM-6677" "TCGA-DM-A28E" "TCGA-NH-A50V" "TCGA-A6-6141" "TCGA-CA-5256"
## [161] "TCGA-G4-6304" "TCGA-CM-6674" "TCGA-D5-6930" "TCGA-AZ-4308" "TCGA-A6-2680"
## [166] "TCGA-CM-6162" "TCGA-QG-A5YX" "TCGA-AM-5821" "TCGA-DM-A0XD" "TCGA-A6-5667"
## [171] "TCGA-CM-5344" "TCGA-CM-6167" "TCGA-A6-5666" "TCGA-CK-5914" "TCGA-DM-A0X9"
## [176] "TCGA-AZ-4315" "TCGA-F4-6460" "TCGA-DM-A1DB" "TCGA-D5-5538" "TCGA-AD-6889"
## [181] "TCGA-DM-A1D7" "TCGA-AY-A69D" "TCGA-CA-5255" "TCGA-A6-3810" "TCGA-CK-4952"
## [186] "TCGA-DM-A28K" "TCGA-G4-6322" "TCGA-G4-6293" "TCGA-D5-6924" "TCGA-DM-A28C"
## [191] "TCGA-AY-6196" "TCGA-A6-5664" "TCGA-CM-6680" "TCGA-AZ-4615" "TCGA-AZ-6598"
## [196] "TCGA-AD-6965" "TCGA-G4-6625" "TCGA-DC-4745" "TCGA-EI-6917" "TCGA-EI-6506"
## [201] "TCGA-F5-6571" "TCGA-EI-6513" "TCGA-AH-6643" "TCGA-AF-A56K" "TCGA-DT-5265"
## [206] "TCGA-F5-6864" "TCGA-AG-A02N" "TCGA-EI-6881" "TCGA-EI-6511" "TCGA-F5-6465"
## [211] "TCGA-AG-4022" "TCGA-DC-6155" "TCGA-AF-2690" "TCGA-G5-6235" "TCGA-DY-A1DD"
## [216] "TCGA-DC-5869" "TCGA-AG-A01W" "TCGA-EI-6884" "TCGA-DY-A0XA" "TCGA-AG-3592"
## [221] "TCGA-G5-6641" "TCGA-CL-5918" "TCGA-EI-6512" "TCGA-EI-6509" "TCGA-AF-3911"
## [226] "TCGA-CI-6622" "TCGA-AF-6655" "TCGA-F5-6863" "TCGA-F5-6861" "TCGA-AG-3725"
## [231] "TCGA-EI-6514" "TCGA-EF-5830" "TCGA-AG-A01Y" "TCGA-EI-6508" "TCGA-DC-6682"
## [236] "TCGA-CL-5917" "TCGA-F5-6811" "TCGA-DY-A1DC" "TCGA-AH-6903" "TCGA-AF-6136"
## [241] "TCGA-AG-A026" "TCGA-F5-6813" "TCGA-BM-6198" "TCGA-F5-6812" "TCGA-AF-A56L"
## [246] "TCGA-AG-A020" "TCGA-EI-6883" "TCGA-EF-5831" "TCGA-EI-6882" "TCGA-AG-3591"
## [251] "TCGA-F5-6464" "TCGA-AF-A56N" "TCGA-CI-6621" "TCGA-DY-A1H8" "TCGA-AG-A036"
## [256] "TCGA-F5-6814" "TCGA-DY-A1DF" "TCGA-EI-6507" "TCGA-DY-A1DE" "TCGA-DC-6683"
## [261] "TCGA-AF-2687"
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.1 (2019-07-05)
## os Ubuntu 19.10
## system x86_64, linux-gnu
## ui X11
## language en_US
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-02-07
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date lib source
## annotate 1.64.0 2019-10-29 [1] Bioconductor
## AnnotationDbi 1.48.0 2019-10-29 [1] Bioconductor
## aroma.light 3.16.0 2019-10-29 [1] Bioconductor
## askpass 1.1 2019-01-13 [2] CRAN (R 3.6.1)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 3.6.1)
## backports 1.1.5 2019-10-02 [2] CRAN (R 3.6.1)
## Biobase 2.46.0 2019-10-29 [1] Bioconductor
## BiocFileCache 1.10.0 2019-10-29 [1] Bioconductor
## BiocGenerics 0.32.0 2019-10-29 [1] Bioconductor
## BiocParallel 1.20.0 2019-10-30 [1] Bioconductor
## biomaRt 2.42.0 2019-10-29 [1] Bioconductor
## Biostrings 2.54.0 2019-10-29 [1] Bioconductor
## bit 1.1-15.1 2020-01-14 [2] CRAN (R 3.6.1)
## bit64 0.9-7 2017-05-08 [2] CRAN (R 3.6.1)
## bitops 1.0-6 2013-08-17 [2] CRAN (R 3.6.1)
## blob 1.2.1 2020-01-20 [2] CRAN (R 3.6.1)
## broom 0.5.4 2020-01-27 [2] CRAN (R 3.6.1)
## callr 3.4.1 2020-01-24 [2] CRAN (R 3.6.1)
## cli 2.0.1 2020-01-08 [2] CRAN (R 3.6.1)
## codetools 0.2-16 2018-12-24 [4] CRAN (R 3.6.0)
## colorspace 1.4-1 2019-03-18 [2] CRAN (R 3.6.1)
## crayon 1.3.4 2017-09-16 [2] CRAN (R 3.6.1)
## curl 4.3 2019-12-02 [2] CRAN (R 3.6.1)
## data.table 1.12.6 2019-10-18 [1] CRAN (R 3.6.1)
## DBI 1.1.0 2019-12-15 [2] CRAN (R 3.6.1)
## dbplyr 1.4.2 2019-06-17 [2] CRAN (R 3.6.1)
## DelayedArray 0.12.0 2019-10-29 [1] Bioconductor
## desc 1.2.0 2018-05-01 [2] CRAN (R 3.6.1)
## DESeq 1.38.0 2019-10-29 [1] Bioconductor
## devtools 2.2.1 2019-09-24 [2] CRAN (R 3.6.1)
## digest 0.6.22 2019-10-21 [1] CRAN (R 3.6.1)
## doParallel 1.0.15 2019-08-02 [2] CRAN (R 3.6.1)
## downloader 0.4 2015-07-09 [2] CRAN (R 3.6.1)
## dplyr * 0.8.4 2020-01-31 [2] CRAN (R 3.6.1)
## EDASeq 2.20.0 2019-10-29 [1] Bioconductor
## edgeR 3.28.0 2019-10-29 [1] Bioconductor
## ellipsis 0.3.0 2019-09-20 [2] CRAN (R 3.6.1)
## evaluate 0.14 2019-05-28 [2] CRAN (R 3.6.1)
## fansi 0.4.1 2020-01-08 [2] CRAN (R 3.6.1)
## foreach 1.4.7 2019-07-27 [2] CRAN (R 3.6.1)
## fs 1.3.1 2019-05-06 [2] CRAN (R 3.6.1)
## genefilter 1.68.0 2019-10-29 [1] Bioconductor
## geneplotter 1.64.0 2019-10-29 [1] Bioconductor
## generics 0.0.2 2018-11-29 [2] CRAN (R 3.6.1)
## GenomeInfoDb 1.22.0 2019-10-29 [1] Bioconductor
## GenomeInfoDbData 1.2.2 2019-11-06 [1] Bioconductor
## GenomicAlignments 1.22.0 2019-10-29 [1] Bioconductor
## GenomicFeatures 1.38.0 2019-10-29 [1] Bioconductor
## GenomicRanges 1.38.0 2019-10-29 [1] Bioconductor
## ggplot2 3.2.1 2019-08-10 [2] CRAN (R 3.6.1)
## ggpubr 0.2.4 2019-11-14 [2] CRAN (R 3.6.1)
## ggrepel 0.8.1 2019-05-07 [2] CRAN (R 3.6.1)
## ggsignif 0.6.0 2019-08-08 [2] CRAN (R 3.6.1)
## ggthemes 4.2.0 2019-05-13 [2] CRAN (R 3.6.1)
## glue 1.3.1 2019-03-12 [2] CRAN (R 3.6.1)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 3.6.1)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.1)
## hms 0.5.3 2020-01-08 [2] CRAN (R 3.6.1)
## htmltools 0.4.0 2019-10-04 [2] CRAN (R 3.6.1)
## httr 1.4.1 2019-08-05 [2] CRAN (R 3.6.1)
## hwriter 1.3.2 2014-09-10 [2] CRAN (R 3.6.1)
## IRanges 2.20.0 2019-10-29 [1] Bioconductor
## iterators 1.0.12 2019-07-26 [2] CRAN (R 3.6.1)
## jpeg 0.1-8.1 2019-10-24 [1] CRAN (R 3.6.1)
## jsonlite 1.6.1 2020-02-02 [2] CRAN (R 3.6.1)
## km.ci 0.5-2 2009-08-30 [2] CRAN (R 3.6.1)
## KMsurv 0.1-5 2012-12-03 [2] CRAN (R 3.6.1)
## knitr 1.28 2020-02-06 [2] CRAN (R 3.6.1)
## lattice 0.20-38 2018-11-04 [4] CRAN (R 3.6.0)
## latticeExtra 0.6-29 2019-12-19 [2] CRAN (R 3.6.1)
## lazyeval 0.2.2 2019-03-15 [2] CRAN (R 3.6.1)
## lifecycle 0.1.0 2019-08-01 [2] CRAN (R 3.6.1)
## limma 3.42.0 2019-10-29 [1] Bioconductor
## locfit 1.5-9.1 2013-04-20 [2] CRAN (R 3.6.1)
## magrittr 1.5 2014-11-22 [2] CRAN (R 3.6.1)
## Matrix 1.2-18 2019-11-27 [2] CRAN (R 3.6.1)
## matrixStats 0.55.0 2019-09-07 [2] CRAN (R 3.6.1)
## memoise 1.1.0 2017-04-21 [2] CRAN (R 3.6.1)
## mgcv 1.8-31 2019-11-09 [4] CRAN (R 3.6.1)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 3.6.1)
## nlme 3.1-144 2020-02-06 [4] CRAN (R 3.6.1)
## openssl 1.4.1 2019-07-18 [2] CRAN (R 3.6.1)
## parsetools 0.1.1 2019-07-11 [2] CRAN (R 3.6.1)
## pillar 1.4.3 2019-12-20 [2] CRAN (R 3.6.1)
## pkgbuild 1.0.6 2019-10-09 [2] CRAN (R 3.6.1)
## pkgcond 0.1.0 2018-12-03 [2] CRAN (R 3.6.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 3.6.1)
## pkgload 1.0.2 2018-10-29 [2] CRAN (R 3.6.1)
## plyr 1.8.5 2019-12-10 [2] CRAN (R 3.6.1)
## png 0.1-7 2013-12-03 [2] CRAN (R 3.6.1)
## postlogic 0.1.0.1 2019-12-18 [2] CRAN (R 3.6.1)
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 3.6.1)
## processx 3.4.1 2019-07-18 [2] CRAN (R 3.6.1)
## progress 1.2.2 2019-05-16 [2] CRAN (R 3.6.1)
## ps 1.3.0 2018-12-21 [2] CRAN (R 3.6.1)
## purrr 0.3.3 2019-10-18 [1] CRAN (R 3.6.1)
## purrrogress 0.1.1 2019-07-22 [2] CRAN (R 3.6.1)
## R.methodsS3 1.7.1 2016-02-16 [2] CRAN (R 3.6.1)
## R.oo 1.23.0 2019-11-03 [1] CRAN (R 3.6.1)
## R.utils 2.9.2 2019-12-08 [2] CRAN (R 3.6.1)
## R6 2.4.1 2019-11-12 [2] CRAN (R 3.6.1)
## rappdirs 0.3.1 2016-03-28 [2] CRAN (R 3.6.1)
## RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 3.6.1)
## Rcpp 1.0.3 2019-11-08 [2] CRAN (R 3.6.1)
## RCurl 1.98-1.1 2020-01-19 [2] CRAN (R 3.6.1)
## readr 1.3.1 2018-12-21 [2] CRAN (R 3.6.1)
## remotes 2.1.0 2019-06-24 [2] CRAN (R 3.6.1)
## rlang 0.4.4 2020-01-28 [2] CRAN (R 3.6.1)
## rmarkdown 1.16 2019-10-01 [1] CRAN (R 3.6.1)
## rprojroot 1.3-2 2018-01-03 [2] CRAN (R 3.6.1)
## Rsamtools 2.2.0 2019-10-29 [1] Bioconductor
## RSQLite 2.2.0 2020-01-07 [2] CRAN (R 3.6.1)
## rtracklayer 1.46.0 2019-10-29 [1] Bioconductor
## rvest 0.3.5 2019-11-08 [2] CRAN (R 3.6.1)
## S4Vectors 0.24.0 2019-10-29 [1] Bioconductor
## scales 1.1.0 2019-11-18 [2] CRAN (R 3.6.1)
## selectr 0.4-2 2019-11-20 [2] CRAN (R 3.6.1)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 3.6.1)
## ShortRead 1.44.0 2019-10-29 [1] Bioconductor
## stringi 1.4.5 2020-01-11 [2] CRAN (R 3.6.1)
## stringr 1.4.0 2019-02-10 [2] CRAN (R 3.6.1)
## SummarizedExperiment 1.16.0 2019-10-29 [1] Bioconductor
## survival 3.1-8 2019-12-03 [4] CRAN (R 3.6.1)
## survminer 0.4.6 2019-09-03 [2] CRAN (R 3.6.1)
## survMisc 0.5.5 2018-07-05 [2] CRAN (R 3.6.1)
## sva 3.34.0 2019-10-29 [1] Bioconductor
## R TCGAbiolinks * 2.15.3 <NA> [2] <NA>
## testextra 0.1.0.1 2019-12-18 [2] CRAN (R 3.6.1)
## testthat 2.3.1 2019-12-01 [2] CRAN (R 3.6.1)
## tibble 2.1.3 2019-06-06 [2] CRAN (R 3.6.1)
## tidyr 1.0.2 2020-01-24 [2] CRAN (R 3.6.1)
## tidyselect 1.0.0 2020-01-27 [2] CRAN (R 3.6.1)
## usethis 1.5.1 2019-07-04 [2] CRAN (R 3.6.1)
## vctrs 0.2.2 2020-01-24 [2] CRAN (R 3.6.1)
## withr 2.1.2 2018-03-15 [2] CRAN (R 3.6.1)
## xfun 0.10 2019-10-01 [1] CRAN (R 3.6.1)
## XML 3.99-0.3 2020-01-20 [2] CRAN (R 3.6.1)
## xml2 1.2.2 2019-08-09 [2] CRAN (R 3.6.1)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 3.6.1)
## XVector 0.26.0 2019-10-29 [1] Bioconductor
## yaml 2.2.1 2020-02-01 [2] CRAN (R 3.6.1)
## zlibbioc 1.32.0 2019-10-29 [1] Bioconductor
## zoo 1.8-7 2020-01-10 [2] CRAN (R 3.6.1)
##
## [1] /home/tiagochst/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library
##
## R ── Package was removed from disk.