query <- GDCquery(project = "TCGA-BRCA",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM-UQ",
sample.type = "Solid Tissue Normal")
GDCdownload(query)
rna <- GDCprepare(query,summarizedExperiment = TRUE)
rna.matrix <- assay(rna) # rows are ENSEMBL ID
rownames(rna.matrix) <- values(rna)$external_gene_name # gene symbols
dim(rna.matrix)
## [1] 56499 113
## TCGA-BH-A1F6-11B-94R-A13Q-07 TCGA-E9-A1NF-11A-73R-A14D-07
## TSPAN6 447440.54 320888.65
## TNMD 339662.82 644869.44
## DPM1 637945.32 410204.70
## SCYL3 79491.92 60651.01
## TCGA-E9-A1ND-11A-43R-A144-07 TCGA-E2-A1LH-11A-22R-A14D-07
## TSPAN6 266118.02 478460.45
## TNMD 271122.94 32914.84
## DPM1 512881.21 520469.49
## SCYL3 53184.87 116536.35
query_meth <- GDCquery(project = "TCGA-BRCA",
legacy = TRUE,
data.category = "DNA methylation",
platform = "Illumina Human Methylation 450",
sample.type = "Solid Tissue Normal")
GDCdownload(query_meth,files.per.chunk = 10)
dna.met <- GDCprepare(query_meth,summarizedExperiment = TRUE)
dna.met.matrix <- assay(dna.met)
## TCGA-BH-A209-11A-42D-A161-05 TCGA-BH-A0E0-11A-13D-A10Q-05
## cg00000029 0.6419409 0.1917094
## cg00000108 NA NA
## cg00000109 NA NA
## cg00000165 0.4406123 0.2278156
## TCGA-BH-A1FD-11B-21D-A13T-05 TCGA-BH-A0C3-11A-23D-A12R-05
## cg00000029 0.09919282 0.1299600
## cg00000108 NA NA
## cg00000109 NA NA
## cg00000165 0.15720792 0.2342865
## [1] 485577 97
## GRanges object with 485577 ranges and 1 metadata column:
## seqnames ranges strand | gene
## <Rle> <IRanges> <Rle> | <character>
## cg13869341 chr1 15865-15866 - | WASH7P
## cg14008030 chr1 18827-18828 - | MIR6859-3;WASH7P
## cg12045430 chr1 29407-29408 - | MIR1302-9;RP11-34P13.3;WASH7P
## cg20826792 chr1 29425-29426 - | MIR1302-9;RP11-34P13.3;WASH7P
## cg00381604 chr1 29435-29436 - | MIR1302-9;RP11-34P13.3;WASH7P
## ... ... ... ... . ...
## cg24238852 * 0 * | <NA>
## cg15254640 * 0 * | <NA>
## cg24336839 * 0 * | <NA>
## cg11478607 * 0 * | <NA>
## cg08417382 * 0 * | <NA>
## -------
## seqinfo: 26 sequences from an unspecified genome; no seqlengths
# this should take a while...
probe.gene <- probes.info[,"gene"] %>%
as.data.frame() %>%
as_tibble(rownames = "probe") %>%
dplyr::select("probe","gene") %>%
na.omit
probe.gene <- bind_rows(probe.gene[grep(";",probe.gene$gene,invert = TRUE),],
probe.gene[grep(";",probe.gene$gene,invert = FALSE),] %>%
transform(gene = strsplit(gene, ";")) %>%
unnest(gene)
)
probe.gene %>% dim
## [1] 547716 2
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.1 (2019-07-05)
## os Ubuntu 19.10
## system x86_64, linux-gnu
## ui X11
## language en_US
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-02-03
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date lib source
## annotate 1.64.0 2019-10-29 [1] Bioconductor
## AnnotationDbi 1.48.0 2019-10-29 [1] Bioconductor
## AnnotationHub * 2.18.0 2019-10-29 [1] Bioconductor
## aroma.light 3.16.0 2019-10-29 [1] Bioconductor
## askpass 1.1 2019-01-13 [2] CRAN (R 3.6.1)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 3.6.1)
## backports 1.1.5 2019-10-02 [2] CRAN (R 3.6.1)
## base64enc 0.1-3 2015-07-28 [2] CRAN (R 3.6.1)
## Biobase * 2.46.0 2019-10-29 [1] Bioconductor
## BiocFileCache * 1.10.0 2019-10-29 [1] Bioconductor
## BiocGenerics * 0.32.0 2019-10-29 [1] Bioconductor
## BiocManager 1.30.10 2019-11-16 [2] CRAN (R 3.6.1)
## BiocParallel * 1.20.0 2019-10-30 [1] Bioconductor
## BiocVersion 3.10.1 2019-06-06 [1] Bioconductor
## biomaRt 2.42.0 2019-10-29 [1] Bioconductor
## Biostrings 2.54.0 2019-10-29 [1] Bioconductor
## bit 1.1-15.1 2020-01-14 [2] CRAN (R 3.6.1)
## bit64 0.9-7 2017-05-08 [2] CRAN (R 3.6.1)
## bitops 1.0-6 2013-08-17 [2] CRAN (R 3.6.1)
## blob 1.2.1 2020-01-20 [2] CRAN (R 3.6.1)
## broom 0.5.3 2019-12-14 [2] CRAN (R 3.6.1)
## callr 3.4.0 2019-12-09 [2] CRAN (R 3.6.1)
## cli 2.0.1 2020-01-08 [2] CRAN (R 3.6.1)
## codetools 0.2-16 2018-12-24 [4] CRAN (R 3.6.0)
## colorspace 1.4-1 2019-03-18 [2] CRAN (R 3.6.1)
## crayon 1.3.4 2017-09-16 [2] CRAN (R 3.6.1)
## curl 4.3 2019-12-02 [2] CRAN (R 3.6.1)
## data.table 1.12.6 2019-10-18 [1] CRAN (R 3.6.1)
## DBI 1.1.0 2019-12-15 [2] CRAN (R 3.6.1)
## dbplyr * 1.4.2 2019-06-17 [2] CRAN (R 3.6.1)
## DelayedArray * 0.12.0 2019-10-29 [1] Bioconductor
## desc 1.2.0 2018-05-01 [2] CRAN (R 3.6.1)
## DESeq 1.38.0 2019-10-29 [1] Bioconductor
## devtools 2.2.1 2019-09-24 [2] CRAN (R 3.6.1)
## digest 0.6.22 2019-10-21 [1] CRAN (R 3.6.1)
## DNAcopy 1.60.0 2019-10-29 [1] Bioconductor
## doParallel 1.0.15 2019-08-02 [2] CRAN (R 3.6.1)
## downloader 0.4 2015-07-09 [2] CRAN (R 3.6.1)
## dplyr * 0.8.4 2020-01-31 [2] CRAN (R 3.6.1)
## EDASeq 2.20.0 2019-10-29 [1] Bioconductor
## edgeR 3.28.0 2019-10-29 [1] Bioconductor
## ellipsis 0.3.0 2019-09-20 [2] CRAN (R 3.6.1)
## evaluate 0.14 2019-05-28 [2] CRAN (R 3.6.1)
## ExperimentHub * 1.12.0 2019-10-29 [1] Bioconductor
## fansi 0.4.1 2020-01-08 [2] CRAN (R 3.6.1)
## fastmap 1.0.1 2019-10-08 [2] CRAN (R 3.6.1)
## foreach 1.4.7 2019-07-27 [2] CRAN (R 3.6.1)
## fs 1.3.1 2019-05-06 [2] CRAN (R 3.6.1)
## genefilter 1.68.0 2019-10-29 [1] Bioconductor
## geneplotter 1.64.0 2019-10-29 [1] Bioconductor
## generics 0.0.2 2018-11-29 [2] CRAN (R 3.6.1)
## GenomeInfoDb * 1.22.0 2019-10-29 [1] Bioconductor
## GenomeInfoDbData 1.2.2 2019-11-06 [1] Bioconductor
## GenomicAlignments 1.22.0 2019-10-29 [1] Bioconductor
## GenomicFeatures 1.38.0 2019-10-29 [1] Bioconductor
## GenomicRanges * 1.38.0 2019-10-29 [1] Bioconductor
## ggplot2 3.2.1 2019-08-10 [2] CRAN (R 3.6.1)
## ggpubr 0.2.4 2019-11-14 [2] CRAN (R 3.6.1)
## ggrepel 0.8.1 2019-05-07 [2] CRAN (R 3.6.1)
## ggsignif 0.6.0 2019-08-08 [2] CRAN (R 3.6.1)
## ggthemes 4.2.0 2019-05-13 [2] CRAN (R 3.6.1)
## glue 1.3.1 2019-03-12 [2] CRAN (R 3.6.1)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 3.6.1)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.1)
## hms 0.5.3 2020-01-08 [2] CRAN (R 3.6.1)
## htmltools 0.4.0 2019-10-04 [2] CRAN (R 3.6.1)
## httpuv 1.5.2 2019-09-11 [2] CRAN (R 3.6.1)
## httr 1.4.1 2019-08-05 [2] CRAN (R 3.6.1)
## hwriter 1.3.2 2014-09-10 [2] CRAN (R 3.6.1)
## interactiveDisplayBase 1.24.0 2019-10-29 [1] Bioconductor
## IRanges * 2.20.0 2019-10-29 [1] Bioconductor
## iterators 1.0.12 2019-07-26 [2] CRAN (R 3.6.1)
## jpeg 0.1-8.1 2019-10-24 [1] CRAN (R 3.6.1)
## jsonlite 1.6.1 2020-02-02 [2] CRAN (R 3.6.1)
## km.ci 0.5-2 2009-08-30 [2] CRAN (R 3.6.1)
## KMsurv 0.1-5 2012-12-03 [2] CRAN (R 3.6.1)
## knitr 1.26 2019-11-12 [2] CRAN (R 3.6.1)
## later 1.0.0 2019-10-04 [2] CRAN (R 3.6.1)
## lattice 0.20-38 2018-11-04 [4] CRAN (R 3.6.0)
## latticeExtra 0.6-29 2019-12-19 [2] CRAN (R 3.6.1)
## lazyeval 0.2.2 2019-03-15 [2] CRAN (R 3.6.1)
## lifecycle 0.1.0 2019-08-01 [2] CRAN (R 3.6.1)
## limma 3.42.0 2019-10-29 [1] Bioconductor
## locfit 1.5-9.1 2013-04-20 [2] CRAN (R 3.6.1)
## magrittr 1.5 2014-11-22 [2] CRAN (R 3.6.1)
## Matrix 1.2-18 2019-11-27 [2] CRAN (R 3.6.1)
## matrixStats * 0.55.0 2019-09-07 [2] CRAN (R 3.6.1)
## memoise 1.1.0 2017-04-21 [2] CRAN (R 3.6.1)
## mgcv 1.8-31 2019-11-09 [4] CRAN (R 3.6.1)
## mime 0.8 2019-12-19 [2] CRAN (R 3.6.1)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 3.6.1)
## nlme 3.1-143 2019-12-10 [4] CRAN (R 3.6.1)
## openssl 1.4.1 2019-07-18 [2] CRAN (R 3.6.1)
## parsetools 0.1.1 2019-07-11 [2] CRAN (R 3.6.1)
## pillar 1.4.3 2019-12-20 [2] CRAN (R 3.6.1)
## pkgbuild 1.0.6 2019-10-09 [2] CRAN (R 3.6.1)
## pkgcond 0.1.0 2018-12-03 [2] CRAN (R 3.6.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 3.6.1)
## pkgload 1.0.2 2018-10-29 [2] CRAN (R 3.6.1)
## plyr 1.8.5 2019-12-10 [2] CRAN (R 3.6.1)
## png 0.1-7 2013-12-03 [2] CRAN (R 3.6.1)
## postlogic 0.1.0.1 2019-12-18 [2] CRAN (R 3.6.1)
## preprocessCore 1.48.0 2019-10-29 [1] Bioconductor
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 3.6.1)
## processx 3.4.1 2019-07-18 [2] CRAN (R 3.6.1)
## progress 1.2.2 2019-05-16 [2] CRAN (R 3.6.1)
## promises 1.1.0 2019-10-04 [2] CRAN (R 3.6.1)
## ps 1.3.0 2018-12-21 [2] CRAN (R 3.6.1)
## purrr 0.3.3 2019-10-18 [1] CRAN (R 3.6.1)
## purrrogress 0.1.1 2019-07-22 [2] CRAN (R 3.6.1)
## R.methodsS3 1.7.1 2016-02-16 [2] CRAN (R 3.6.1)
## R.oo 1.23.0 2019-11-03 [1] CRAN (R 3.6.1)
## R.utils 2.9.2 2019-12-08 [2] CRAN (R 3.6.1)
## R6 2.4.1 2019-11-12 [2] CRAN (R 3.6.1)
## randomForest 4.6-14 2018-03-25 [2] CRAN (R 3.6.1)
## rappdirs 0.3.1 2016-03-28 [2] CRAN (R 3.6.1)
## RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 3.6.1)
## Rcpp 1.0.3 2019-11-08 [2] CRAN (R 3.6.1)
## RCurl 1.98-1.1 2020-01-19 [2] CRAN (R 3.6.1)
## readr 1.3.1 2018-12-21 [2] CRAN (R 3.6.1)
## remotes 2.1.0 2019-06-24 [2] CRAN (R 3.6.1)
## rlang 0.4.4 2020-01-28 [2] CRAN (R 3.6.1)
## rmarkdown 1.16 2019-10-01 [1] CRAN (R 3.6.1)
## rprojroot 1.3-2 2018-01-03 [2] CRAN (R 3.6.1)
## Rsamtools 2.2.0 2019-10-29 [1] Bioconductor
## RSQLite 2.2.0 2020-01-07 [2] CRAN (R 3.6.1)
## rtracklayer 1.46.0 2019-10-29 [1] Bioconductor
## rvest 0.3.5 2019-11-08 [2] CRAN (R 3.6.1)
## S4Vectors * 0.24.0 2019-10-29 [1] Bioconductor
## scales 1.1.0 2019-11-18 [2] CRAN (R 3.6.1)
## selectr 0.4-2 2019-11-20 [2] CRAN (R 3.6.1)
## sesame * 1.4.0 2019-10-29 [1] Bioconductor
## sesameData * 1.4.0 2019-11-05 [1] Bioconductor
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 3.6.1)
## shiny 1.4.0 2019-10-10 [2] CRAN (R 3.6.1)
## ShortRead 1.44.0 2019-10-29 [1] Bioconductor
## stringi 1.4.5 2020-01-11 [2] CRAN (R 3.6.1)
## stringr 1.4.0 2019-02-10 [2] CRAN (R 3.6.1)
## SummarizedExperiment * 1.16.0 2019-10-29 [1] Bioconductor
## survival 3.1-8 2019-12-03 [4] CRAN (R 3.6.1)
## survminer 0.4.6 2019-09-03 [2] CRAN (R 3.6.1)
## survMisc 0.5.5 2018-07-05 [2] CRAN (R 3.6.1)
## sva 3.34.0 2019-10-29 [1] Bioconductor
## R TCGAbiolinks * 2.15.3 <NA> [2] <NA>
## testextra 0.1.0.1 2019-12-18 [2] CRAN (R 3.6.1)
## testthat 2.3.1 2019-12-01 [2] CRAN (R 3.6.1)
## tibble 2.1.3 2019-06-06 [2] CRAN (R 3.6.1)
## tidyr * 1.0.2 2020-01-24 [2] CRAN (R 3.6.1)
## tidyselect 1.0.0 2020-01-27 [2] CRAN (R 3.6.1)
## usethis 1.5.1 2019-07-04 [2] CRAN (R 3.6.1)
## vctrs 0.2.2 2020-01-24 [2] CRAN (R 3.6.1)
## wheatmap 0.1.0 2018-03-15 [2] CRAN (R 3.6.1)
## withr 2.1.2 2018-03-15 [2] CRAN (R 3.6.1)
## xfun 0.10 2019-10-01 [1] CRAN (R 3.6.1)
## XML 3.99-0.3 2020-01-20 [2] CRAN (R 3.6.1)
## xml2 1.2.2 2019-08-09 [2] CRAN (R 3.6.1)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 3.6.1)
## XVector 0.26.0 2019-10-29 [1] Bioconductor
## yaml 2.2.0 2018-07-25 [2] CRAN (R 3.6.1)
## zlibbioc 1.32.0 2019-10-29 [1] Bioconductor
## zoo 1.8-7 2020-01-10 [2] CRAN (R 3.6.1)
##
## [1] /home/tiagochst/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library
##
## R ── Package was removed from disk.