query.DNAmethy.gbm <- GDCquery(
project = "TCGA-GBM",
legacy = TRUE,
data.category = "DNA methylation",
platform = "Illumina Human Methylation 450"
)
GDCdownload(query.DNAmethy.gbm,files.per.chunk = 10)
metGBM <- GDCprepare(query.DNAmethy.gbm)
## class: RangedSummarizedExperiment
## dim: 485577 155
## metadata(1): data_release
## assays(1): ''
## rownames(485577): cg00000029 cg00000108 ... rs966367 rs9839873
## rowData names(2): probeID Gene_Symbol
## colnames(155): TCGA-76-6661-01B-11D-1844-05
## TCGA-74-6573-01A-12D-1844-05 ... TCGA-76-4934-01A-01D-1481-05
## TCGA-06-0152-02A-01D-2004-05
## colData names(109): barcode patient ...
## paper_Telomere.length.estimate.in.blood.normal..Kb.
## paper_Telomere.length.estimate.in.tumor..Kb.
# This line will give you how many NAs you have in your data per probe.
nas.per.cpg <- metGBM %>% assay %>% is.na %>% rowSums
head(nas.per.cpg)
## cg00000029 cg00000108 cg00000109 cg00000165 cg00000236 cg00000289
## 0 155 155 0 0 0
number.samples <- ncol(metGBM)
is.cpg.more.than.seventyPercent.is.na <- (nas.per.cpg/number.samples) > 0.70
metGBM.na.filter <- metGBM[!is.cpg.more.than.seventyPercent.is.na,]
## [1] 485577 155
## [1] 396065 155
## [1] 89512
## .
## FALSE TRUE
## 61332740 57335
## [1] "matrix"
## .
## FALSE
## 61390075
## [1] 0.0000000 0.9962979
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.1 (2019-07-05)
## os Ubuntu 19.10
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-08-04
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## abind 1.4-5 2016-07-21 [2] CRAN (R 3.6.1)
## annotate 1.64.0 2019-10-29 [2] Bioconductor
## AnnotationDbi 1.48.0 2019-10-29 [2] Bioconductor
## aroma.light 3.16.0 2019-10-29 [2] Bioconductor
## askpass 1.1 2019-01-13 [2] CRAN (R 3.6.1)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 3.6.1)
## backports 1.1.8 2020-06-17 [2] CRAN (R 3.6.1)
## Biobase * 2.46.0 2019-10-29 [2] Bioconductor
## BiocFileCache 1.10.2 2019-11-08 [2] Bioconductor
## BiocGenerics * 0.32.0 2019-10-29 [2] Bioconductor
## BiocParallel * 1.20.1 2019-12-21 [2] Bioconductor
## biomaRt 2.42.1 2020-03-26 [2] Bioconductor
## Biostrings 2.54.0 2019-10-29 [2] Bioconductor
## bit 4.0.4 2020-08-04 [2] CRAN (R 3.6.1)
## bit64 0.9-7.1 2020-07-15 [1] CRAN (R 3.6.1)
## bitops 1.0-6 2013-08-17 [2] CRAN (R 3.6.1)
## blob 1.2.1 2020-01-20 [2] CRAN (R 3.6.1)
## broom 0.7.0 2020-07-09 [2] CRAN (R 3.6.1)
## callr 3.4.3 2020-03-28 [2] CRAN (R 3.6.1)
## car 3.0-8 2020-05-21 [2] CRAN (R 3.6.1)
## carData 3.0-4 2020-05-22 [2] CRAN (R 3.6.1)
## cellranger 1.1.0 2016-07-27 [2] CRAN (R 3.6.1)
## cli 2.0.2 2020-02-28 [2] CRAN (R 3.6.1)
## codetools 0.2-16 2018-12-24 [4] CRAN (R 3.5.2)
## colorspace 1.4-1 2019-03-18 [2] CRAN (R 3.6.1)
## crayon 1.3.4 2017-09-16 [2] CRAN (R 3.6.1)
## curl 4.3 2019-12-02 [2] CRAN (R 3.6.1)
## data.table 1.13.0 2020-07-24 [2] CRAN (R 3.6.1)
## DBI 1.1.0 2019-12-15 [2] CRAN (R 3.6.1)
## dbplyr 1.4.4 2020-05-27 [2] CRAN (R 3.6.1)
## DelayedArray * 0.12.3 2020-04-09 [2] Bioconductor
## desc 1.2.0 2018-05-01 [2] CRAN (R 3.6.1)
## DESeq 1.38.0 2019-10-29 [2] Bioconductor
## devtools 2.3.1 2020-07-21 [2] CRAN (R 3.6.1)
## digest 0.6.25 2020-02-23 [2] CRAN (R 3.6.1)
## doParallel 1.0.15 2019-08-02 [2] CRAN (R 3.6.1)
## downloader 0.4 2015-07-09 [2] CRAN (R 3.6.1)
## dplyr * 1.0.1 2020-07-31 [2] CRAN (R 3.6.1)
## EDASeq 2.20.0 2019-10-29 [2] Bioconductor
## edgeR 3.28.1 2020-02-26 [2] Bioconductor
## ellipsis 0.3.1 2020-05-15 [2] CRAN (R 3.6.1)
## evaluate 0.14 2019-05-28 [2] CRAN (R 3.6.1)
## fansi 0.4.1 2020-01-08 [2] CRAN (R 3.6.1)
## forcats 0.5.0 2020-03-01 [2] CRAN (R 3.6.1)
## foreach 1.5.0 2020-03-30 [2] CRAN (R 3.6.1)
## foreign 0.8-72 2019-08-02 [4] CRAN (R 3.6.1)
## fs 1.5.0 2020-07-31 [2] CRAN (R 3.6.1)
## genefilter 1.68.0 2019-10-29 [2] Bioconductor
## geneplotter 1.64.0 2019-10-29 [2] Bioconductor
## generics 0.0.2 2018-11-29 [2] CRAN (R 3.6.1)
## GenomeInfoDb * 1.22.1 2020-03-27 [2] Bioconductor
## GenomeInfoDbData 1.2.2 2019-12-02 [2] Bioconductor
## GenomicAlignments 1.22.1 2019-11-12 [2] Bioconductor
## GenomicFeatures 1.38.2 2020-02-15 [2] Bioconductor
## GenomicRanges * 1.38.0 2019-10-29 [2] Bioconductor
## ggplot2 3.3.2 2020-06-19 [2] CRAN (R 3.6.1)
## ggpubr 0.4.0 2020-06-27 [2] CRAN (R 3.6.1)
## ggrepel 0.8.2 2020-03-08 [2] CRAN (R 3.6.1)
## ggsignif 0.6.0 2019-08-08 [2] CRAN (R 3.6.1)
## ggthemes 4.2.0 2019-05-13 [2] CRAN (R 3.6.1)
## glue 1.4.1 2020-05-13 [2] CRAN (R 3.6.1)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 3.6.1)
## gtable 0.3.0 2019-03-25 [2] CRAN (R 3.6.1)
## haven 2.3.1 2020-06-01 [2] CRAN (R 3.6.1)
## hms 0.5.3 2020-01-08 [2] CRAN (R 3.6.1)
## htmltools 0.5.0 2020-06-16 [2] CRAN (R 3.6.1)
## httr 1.4.2 2020-07-20 [2] CRAN (R 3.6.1)
## hwriter 1.3.2 2014-09-10 [2] CRAN (R 3.6.1)
## impute * 1.60.0 2019-10-29 [2] Bioconductor
## IRanges * 2.20.2 2020-01-13 [2] Bioconductor
## iterators 1.0.12 2019-07-26 [2] CRAN (R 3.6.1)
## jpeg 0.1-8.1 2019-10-24 [2] CRAN (R 3.6.1)
## jsonlite 1.7.0 2020-06-25 [2] CRAN (R 3.6.1)
## km.ci 0.5-2 2009-08-30 [2] CRAN (R 3.6.1)
## KMsurv 0.1-5 2012-12-03 [2] CRAN (R 3.6.1)
## knitr 1.29 2020-06-23 [2] CRAN (R 3.6.1)
## lattice 0.20-41 2020-04-02 [4] CRAN (R 3.6.1)
## latticeExtra 0.6-29 2019-12-19 [2] CRAN (R 3.6.1)
## lifecycle 0.2.0 2020-03-06 [2] CRAN (R 3.6.1)
## limma 3.42.2 2020-02-03 [2] Bioconductor
## locfit 1.5-9.4 2020-03-25 [2] CRAN (R 3.6.1)
## magrittr 1.5 2014-11-22 [2] CRAN (R 3.6.1)
## Matrix 1.2-18 2019-11-27 [2] CRAN (R 3.6.1)
## matrixStats * 0.56.0 2020-03-13 [2] CRAN (R 3.6.1)
## memoise 1.1.0 2017-04-21 [2] CRAN (R 3.6.1)
## mgcv 1.8-31 2019-11-09 [4] CRAN (R 3.6.1)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 3.6.1)
## nlme 3.1-148 2020-05-24 [4] CRAN (R 3.6.1)
## openssl 1.4.2 2020-06-27 [2] CRAN (R 3.6.1)
## openxlsx 4.1.5 2020-05-06 [2] CRAN (R 3.6.1)
## parsetools 0.1.3 2020-04-08 [2] CRAN (R 3.6.1)
## pillar 1.4.6 2020-07-10 [1] CRAN (R 3.6.1)
## pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 3.6.1)
## pkgcond 0.1.0 2018-12-03 [2] CRAN (R 3.6.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 3.6.1)
## pkgload 1.1.0 2020-05-29 [2] CRAN (R 3.6.1)
## plyr 1.8.6 2020-03-03 [2] CRAN (R 3.6.1)
## png 0.1-7 2013-12-03 [2] CRAN (R 3.6.1)
## postlogic 0.1.0.1 2019-12-18 [2] CRAN (R 3.6.1)
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 3.6.1)
## processx 3.4.3 2020-07-05 [2] CRAN (R 3.6.1)
## progress 1.2.2 2019-05-16 [2] CRAN (R 3.6.1)
## ps 1.3.3 2020-05-08 [2] CRAN (R 3.6.1)
## purrr 0.3.4 2020-04-17 [2] CRAN (R 3.6.1)
## purrrogress 0.1.1 2019-07-22 [2] CRAN (R 3.6.1)
## R.methodsS3 1.8.0 2020-02-14 [2] CRAN (R 3.6.1)
## R.oo 1.23.0 2019-11-03 [2] CRAN (R 3.6.1)
## R.utils 2.9.2 2019-12-08 [2] CRAN (R 3.6.1)
## R6 2.4.1 2019-11-12 [2] CRAN (R 3.6.1)
## rappdirs 0.3.1 2016-03-28 [2] CRAN (R 3.6.1)
## RColorBrewer 1.1-2 2014-12-07 [2] CRAN (R 3.6.1)
## Rcpp 1.0.5 2020-07-06 [2] CRAN (R 3.6.1)
## RCurl 1.98-1.2 2020-04-18 [2] CRAN (R 3.6.1)
## readr 1.3.1 2018-12-21 [2] CRAN (R 3.6.1)
## readxl 1.3.1 2019-03-13 [2] CRAN (R 3.6.1)
## remotes 2.2.0 2020-07-21 [2] CRAN (R 3.6.1)
## rio 0.5.16 2018-11-26 [2] CRAN (R 3.6.1)
## rlang 0.4.7 2020-07-09 [2] CRAN (R 3.6.1)
## rmarkdown 2.3 2020-06-18 [2] CRAN (R 3.6.1)
## rprojroot 1.3-2 2018-01-03 [2] CRAN (R 3.6.1)
## Rsamtools 2.2.3 2020-02-23 [2] Bioconductor
## RSQLite 2.2.0 2020-01-07 [2] CRAN (R 3.6.1)
## rstatix 0.6.0 2020-06-18 [2] CRAN (R 3.6.1)
## rtracklayer 1.46.0 2019-10-29 [2] Bioconductor
## rvest 0.3.6 2020-07-25 [2] CRAN (R 3.6.1)
## S4Vectors * 0.24.4 2020-04-09 [2] Bioconductor
## scales 1.1.1 2020-05-11 [2] CRAN (R 3.6.1)
## selectr 0.4-2 2019-11-20 [2] CRAN (R 3.6.1)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 3.6.1)
## ShortRead 1.44.3 2020-02-03 [2] Bioconductor
## stringi 1.4.6 2020-02-17 [2] CRAN (R 3.6.1)
## stringr 1.4.0 2019-02-10 [2] CRAN (R 3.6.1)
## SummarizedExperiment * 1.16.1 2019-12-19 [2] Bioconductor
## survival 3.2-3 2020-06-13 [4] CRAN (R 3.6.1)
## survminer 0.4.8 2020-07-25 [2] CRAN (R 3.6.1)
## survMisc 0.5.5 2018-07-05 [2] CRAN (R 3.6.1)
## sva 3.34.0 2019-10-29 [2] Bioconductor
## TCGAbiolinks * 2.14.1 2020-02-27 [2] Bioconductor
## testextra 0.1.0.1 2019-12-18 [2] CRAN (R 3.6.1)
## testthat 2.3.2 2020-03-02 [2] CRAN (R 3.6.1)
## tibble 3.0.3 2020-07-10 [1] CRAN (R 3.6.1)
## tidyr 1.1.1 2020-07-31 [2] CRAN (R 3.6.1)
## tidyselect 1.1.0 2020-05-11 [2] CRAN (R 3.6.1)
## usethis 1.6.1 2020-04-29 [2] CRAN (R 3.6.1)
## vctrs 0.3.2 2020-07-15 [1] CRAN (R 3.6.1)
## withr 2.2.0 2020-04-20 [2] CRAN (R 3.6.1)
## xfun 0.16 2020-07-24 [2] CRAN (R 3.6.1)
## XML 3.99-0.3 2020-01-20 [2] CRAN (R 3.6.1)
## xml2 1.3.2 2020-04-23 [2] CRAN (R 3.6.1)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 3.6.1)
## XVector 0.26.0 2019-10-29 [2] Bioconductor
## yaml 2.2.1 2020-02-01 [2] CRAN (R 3.6.1)
## zip 2.0.4 2019-09-01 [2] CRAN (R 3.6.1)
## zlibbioc 1.32.0 2019-10-29 [2] Bioconductor
## zoo 1.8-8 2020-05-02 [2] CRAN (R 3.6.1)
##
## [1] /home/tiagochst/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library