library(TCGAbiolinks)
library(stringr)
query <- GDCquery(project = "TARGET-AML",
                  data.category = "Transcriptome Profiling",
                  data.type = "Gene Expression Quantification", 
                  workflow.type = "HTSeq - Counts")
query$results[[1]] <- query$results[[1]][1,]
GDCdownload(query)
data.se <- GDCprepare(query)

  |                                                                                                                                                                                           
  |                                                                                                                                                                                     |   0%
  |                                                                                                                                                                                           
  |=====================================================================================================================================================================================| 100%
data.df <- GDCprepare(query, summarizedExperiment = FALSE)

  |                                                                                                                                                                                           
  |                                                                                                                                                                                     |   0%
  |                                                                                                                                                                                           
  |=====================================================================================================================================================================================| 100%
# Remove ensenbl version ENSG00000238803.1 will be ENSG00000238803
aux <- strsplit(data.df$X1,"\\.")
data.df$ensembl_gene_id <- as.character(unlist(lapply(aux,function(x) x[1])))
table(data.df$ensembl_gene_id %in% rownames(data.se))

FALSE  TRUE 
 3658 56830 
# Which were not mapped ?
as_tibble(data.df$ensembl_gene_id[!data.df$ensembl_gene_id %in% rownames(data.se)])
# confirm they are not in ENSEMBLE DB
hg38 <- TCGAbiolinks:::get.GRCh.bioMart("hg38")
Downloading genome information (try:0) Using: Human genes (GRCh38.p12)
Loading from disk
table(data.df$ensembl_gene_id %in% hg38$ensembl_gene_id)

FALSE  TRUE 
 3658 56830 
LS0tCnRpdGxlOiAiUXVlc3Rpb246IFRDR0FiaW9saW5rcyBPdXRQdXRzICg1JSBMb3NpbmcgSW5mb3JtYXRpb24pIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciwgbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KFRDR0FiaW9saW5rcykKbGlicmFyeShzdHJpbmdyKQpxdWVyeSA8LSBHRENxdWVyeShwcm9qZWN0ID0gIlRBUkdFVC1BTUwiLAogICAgICAgICAgICAgICAgICBkYXRhLmNhdGVnb3J5ID0gIlRyYW5zY3JpcHRvbWUgUHJvZmlsaW5nIiwKICAgICAgICAgICAgICAgICAgZGF0YS50eXBlID0gIkdlbmUgRXhwcmVzc2lvbiBRdWFudGlmaWNhdGlvbiIsIAogICAgICAgICAgICAgICAgICB3b3JrZmxvdy50eXBlID0gIkhUU2VxIC0gQ291bnRzIikKcXVlcnkkcmVzdWx0c1tbMV1dIDwtIHF1ZXJ5JHJlc3VsdHNbWzFdXVsxLF0KR0RDZG93bmxvYWQocXVlcnkpCmRhdGEuc2UgPC0gR0RDcHJlcGFyZShxdWVyeSkKZGF0YS5kZiA8LSBHRENwcmVwYXJlKHF1ZXJ5LCBzdW1tYXJpemVkRXhwZXJpbWVudCA9IEZBTFNFKQoKIyBSZW1vdmUgZW5zZW5ibCB2ZXJzaW9uIEVOU0cwMDAwMDIzODgwMy4xIHdpbGwgYmUgRU5TRzAwMDAwMjM4ODAzCmF1eCA8LSBzdHJzcGxpdChkYXRhLmRmJFgxLCJcXC4iKQpkYXRhLmRmJGVuc2VtYmxfZ2VuZV9pZCA8LSBhcy5jaGFyYWN0ZXIodW5saXN0KGxhcHBseShhdXgsZnVuY3Rpb24oeCkgeFsxXSkpKQoKYGBgCgpgYGB7cn0KCnRhYmxlKGRhdGEuZGYkZW5zZW1ibF9nZW5lX2lkICVpbiUgcm93bmFtZXMoZGF0YS5zZSkpCgojIFdoaWNoIHdlcmUgbm90IG1hcHBlZCA/CmFzX3RpYmJsZShkYXRhLmRmJGVuc2VtYmxfZ2VuZV9pZFshZGF0YS5kZiRlbnNlbWJsX2dlbmVfaWQgJWluJSByb3duYW1lcyhkYXRhLnNlKV0pCgojIGNvbmZpcm0gdGhleSBhcmUgbm90IGluIEVOU0VNQkxFIERCCmhnMzggPC0gVENHQWJpb2xpbmtzOjo6Z2V0LkdSQ2guYmlvTWFydCgiaGczOCIpCnRhYmxlKGRhdGEuZGYkZW5zZW1ibF9nZW5lX2lkICVpbiUgaGczOCRlbnNlbWJsX2dlbmVfaWQpCmBgYAoKCg==