Play with SRAdb

Jiang Li,MS


Install it

source("http://bioconductor.org/biocLite.R")
biocLite("SRAdb")

Play with it

A detailed description of SRA is available at http://www.ncbi.nlm.nih.gov/books/NBK7522/. Data in the SRA are classified into a hierarchy of Studies, Experiments, Samples, and their corresponding Runs. Studies have an overall goal and may be comprised of several Experiments. An Experiment describes specifically what was sequenced and the method used. It includes information about the source of the DNA, the Sample, the sequencing platform, and the processing of the data. Each Experiment is made up of one or more instrument Runs. A Run contains the results or reads from each spot in the instrument run.

Tables in SRAdb

suppressPackageStartupMessages(library(SRAdb))
suppressPackageStartupMessages(library(DBI))

# Download sqlite file online. it will take some time.  The default
# storage location is in the current working directory and the default
# filename is SRAmetadb.sqlite

# To save time, we have already download it

# sqlfile <- getSRAdbFile()
sqlfile <- "SRAmetadb.sqlite"
sra_con <- dbConnect(SQLite(), sqlfile)

# See all the tables in the sqlite database
sra_tables <- dbListTables(sra_con)
sra_tables
##  [1] "col_desc"        "experiment"      "metaInfo"       
##  [4] "run"             "sample"          "sra"            
##  [7] "sra_ft"          "sra_ft_content"  "sra_ft_segdir"  
## [10] "sra_ft_segments" "study"           "submission"     

See how many expriments using different platforms

# suppressPackageStartupMessages(library('xtable'))
sql <- "select experiment_accession, platform from experiment"
res <- dbGetQuery(sra_con, sql)
platform.experiment <- as.data.frame(table(res[, 2]))
colnames(platform.experiment) <- c("Platform", "Number of experiment")
platform.experiment
##            Platform Number of experiment
## 1         ABI_SOLID                 4234
## 2 COMPLETE_GENOMICS                  369
## 3           HELICOS                  291
## 4          ILLUMINA               108641
## 5       ION_TORRENT                   33
## 6             LS454                19947
## 7       PACBIO_SMRT                  289

Functions to get platform related information

suppressPackageStartupMessages(library("googleVis"))
## Warning: A specification for S3 class "connection" in package 'RJSONIO'
## seems equivalent to one from package 'graph' and is not turning on
## duplicate class definitions for this class
## Warning: A specification for S3 class "file" in package 'RJSONIO' seems
## equivalent to one from package 'graph' and is not turning on duplicate
## class definitions for this class
print.table <- function(data = NULL, width = 1000, height = 520, pageSize = 50, 
    ...) {
    PopTable <- gvisTable(data, options = list(width = width, height = height, 
        pageSize = pageSize, page = "enable"))
    print(PopTable, "chart")
}

# Function to get platform related human experiment and runs
get.platform.related.info <- function(con = NULL, platform) {
    sql <- paste("select run_accession,sample_accession,experiment_accession,study_accession, spots, bases, experiment_title, study_name,library_layout,adapter_spec, platform,instrument_model, taxon_id from sra where taxon_id='9606' and platform='", 
        platform, "'", sep = "")
    res = dbGetQuery(con, sql)
    if (nrow(res)) {
        # cat('<b>Platform',platform,'information table</b>') add length
        read.len <- res$bases/res$spots
        pair.index <- grep("PAIRED", res$library_layout)
        read.len[pair.index] <- read.len[pair.index]/2
        res <- cbind(res, Read.length = read.len)
        outfile = paste("platform_", platform, "_info.csv", sep = "")
        write.csv(res, outfile)
        res[, 1] <- paste("<a href='http://www.ncbi.nlm.nih.gov/sra?term=", 
            res[, 1], "'>", res[, 1], "</a>", sep = "")
        print.table(res, width = 1800)
    } else {
        cat("No result!")
    }
}

HeliScope platform experiment and runs (Human)

platform = "HELICOS"
get.platform.related.info(con = sra_con, platform = platform)

Ion Torrent platform experiment and runs (Human)

platform = "ION_TORRENT"
get.platform.related.info(con = sra_con, platform = platform)

PacBio platform experiment and runs (Human)

platform = "PACBIO_SMRT"
get.platform.related.info(con = sra_con, platform = platform)

Starlight platform experiment and runs (Human)

No records