Contents

1 COAD from curatedTCGAData

suppressPackageStartupMessages({
    library(curatedTCGAData)
})
suppressMessages({
    coad <- curatedTCGAData(
        diseaseCode = "COAD",
        assays = "miRNA*",
        version = "1.1.38",
        dry.run = FALSE
    )
})

2 Extract rownames from miRNA assay

mir <- coad[["COAD_miRNASeqGene-20160128"]]
test_mirs <- tail(rownames(mir))
test_mirs
## [1] "hsa-mir-944" "hsa-mir-95"  "hsa-mir-96"  "hsa-mir-98"  "hsa-mir-99a"
## [6] "hsa-mir-99b"

3 Try miRBaseConverter

suppressPackageStartupMessages({
    library(miRBaseConverter)
})
checkMiRNAVersion(test_mirs)
##    Version Proportion           Recommend
## 1       v6     83.33%                    
## 2     v7_1     83.33%                    
## 3       v8     83.33%                    
## 4     v8_1     83.33%                    
## 5     v8_2     83.33%                    
## 6       v9     83.33%                    
## 7     v9_1     83.33%                    
## 8     v9_2     83.33%                    
## 9      v10       100%  ***BEST Matched***
## 10   v10_1       100%  ***BEST Matched***
## 11     v11       100%  ***BEST Matched***
## 12     v12       100%  ***BEST Matched***
## 13     v13       100%  ***BEST Matched***
## 14     v14       100%  ***BEST Matched***
## 15     v15       100%  ***BEST Matched***
## 16     v16       100%  ***BEST Matched***
## 17     v17       100%  ***BEST Matched***
## 18     v18       100%  ***BEST Matched***
## 19     v19       100%  ***BEST Matched***
## 20     v20       100%  ***BEST Matched***
## 21     v21       100%  ***BEST Matched***
## 22     v22       100%  ***BEST Matched***
## [1] "v22"
#' [1] "v22"
version <- "v22"

4 Get accessions

mirna_info <- miRNA_NameToAccession(test_mirs, version = version)
mirna_info
##   miRNAName_v22 Accession
## 1   hsa-mir-944 MI0005769
## 2    hsa-mir-95 MI0000097
## 3    hsa-mir-96 MI0000098
## 4    hsa-mir-98 MI0000100
## 5   hsa-mir-99a MI0000101
## 6   hsa-mir-99b MI0000746
mirna_accessions <- mirna_info[["Accession"]]
mirna_accessions
## [1] "MI0005769" "MI0000097" "MI0000098" "MI0000100" "MI0000101" "MI0000746"

5 Get Annotated GRanges for GRCh38

suppressPackageStartupMessages({
    library(AnnotationHub)
})
ah <- AnnotationHub()
ens_db_query <- query(ah, c("EnsDb", "Homo sapiens"))
edb <- ens_db_query[[tail(names(ens_db_query), 1)]]
## loading from cache
## require("ensembldb")
edb
## EnsDb for Ensembl:
## |Backend: SQLite
## |Db type: EnsDb
## |Type of Gene ID: Ensembl Gene ID
## |Supporting package: ensembldb
## |Db created by: ensembldb package from Bioconductor
## |script_version: 0.3.10
## |Creation time: Sat Oct 26 21:34:14 2024
## |ensembl_version: 113
## |ensembl_host: 127.0.0.1
## |Organism: Homo sapiens
## |taxonomy_id: 9606
## |genome_build: GRCh38
## |DBSCHEMAVERSION: 2.2
## |common_name: human
## |species: homo_sapiens
## | No. of genes: 87726.
## | No. of transcripts: 413674.
## |Protein data available.
all_mirnas <- ensembldb::genes(edb, filter = GeneBiotypeFilter("miRNA"))
all_mirnas 
## GRanges object with 1945 ranges and 9 metadata columns:
##                   seqnames              ranges strand |         gene_id
##                      <Rle>           <IRanges>  <Rle> |     <character>
##   ENSG00000278267        1         17369-17436      - | ENSG00000278267
##   ENSG00000284332        1         30366-30503      + | ENSG00000284332
##   ENSG00000273874        1       187891-187958      - | ENSG00000273874
##   ENSG00000278791        1       632325-632413      - | ENSG00000278791
##   ENSG00000207730        1     1167104-1167198      + | ENSG00000207730
##               ...      ...                 ...    ... .             ...
##   ENSG00000221533        X 154887360-154887458      - | ENSG00000221533
##   ENSG00000221190        X 155383100-155383198      - | ENSG00000221190
##   ENSG00000221603        X 155457517-155457615      + | ENSG00000221603
##   ENSG00000292355        Y     1293918-1293992      + | ENSG00000292355
##   ENSG00000292346        Y     2609191-2609254      + | ENSG00000292346
##                     gene_name gene_biotype seq_coord_system
##                   <character>  <character>      <character>
##   ENSG00000278267   MIR6859-1        miRNA       chromosome
##   ENSG00000284332   MIR1302-2        miRNA       chromosome
##   ENSG00000273874   MIR6859-2        miRNA       chromosome
##   ENSG00000278791                    miRNA       chromosome
##   ENSG00000207730     MIR200B        miRNA       chromosome
##               ...         ...          ...              ...
##   ENSG00000221533   MIR1184-1        miRNA       chromosome
##   ENSG00000221190   MIR1184-2        miRNA       chromosome
##   ENSG00000221603   MIR1184-3        miRNA       chromosome
##   ENSG00000292355     MIR3690        miRNA       chromosome
##   ENSG00000292346     MIR6089        miRNA       chromosome
##                              description   gene_id_version canonical_transcript
##                              <character>       <character>          <character>
##   ENSG00000278267 microRNA 6859-1 [Sou.. ENSG00000278267.1      ENST00000619216
##   ENSG00000284332 microRNA 1302-2 [Sou.. ENSG00000284332.1      ENST00000607096
##   ENSG00000273874 microRNA 6859-2 [Sou.. ENSG00000273874.1      ENST00000612080
##   ENSG00000278791                        ENSG00000278791.1      ENST00000621981
##   ENSG00000207730 microRNA 200b [Sourc.. ENSG00000207730.3      ENST00000384997
##               ...                    ...               ...                  ...
##   ENSG00000221533 microRNA 1184-1 [Sou.. ENSG00000221533.1      ENST00000408606
##   ENSG00000221190 microRNA 1184-2 [Sou.. ENSG00000221190.1      ENST00000408263
##   ENSG00000221603 microRNA 1184-3 [Sou.. ENSG00000221603.1      ENST00000408676
##   ENSG00000292355 microRNA 3690 [Sourc.. ENSG00000292355.1      ENST00000711140
##   ENSG00000292346 microRNA 6089 [Sourc.. ENSG00000292346.1      ENST00000711167
##                        symbol  entrezid
##                   <character>    <list>
##   ENSG00000278267   MIR6859-1 102466751
##   ENSG00000284332   MIR1302-2 100302278
##   ENSG00000273874   MIR6859-2 102465909
##   ENSG00000278791                  <NA>
##   ENSG00000207730     MIR200B    406984
##               ...         ...       ...
##   ENSG00000221533   MIR1184-1 100302111
##   ENSG00000221190   MIR1184-2 100422985
##   ENSG00000221603   MIR1184-3 100422977
##   ENSG00000292355     MIR3690 100500894
##   ENSG00000292346     MIR6089 102464837
##   -------
##   seqinfo: 61 sequences from GRCh38 genome

6 No matches found

all_mirnas[mcols(all_mirnas)$symbol %in% mirna_accessions]
## GRanges object with 0 ranges and 9 metadata columns:
##    seqnames    ranges strand |     gene_id   gene_name gene_biotype
##       <Rle> <IRanges>  <Rle> | <character> <character>  <character>
##    seq_coord_system description gene_id_version canonical_transcript
##         <character> <character>     <character>          <character>
##         symbol entrezid
##    <character>   <list>
##   -------
##   seqinfo: 61 sequences from GRCh38 genome