Jiang Li,MS
source("http://bioconductor.org/biocLite.R")
biocLite("SRAdb")
A detailed description of SRA is available at http://www.ncbi.nlm.nih.gov/books/NBK7522/. Data in the SRA are classified into a hierarchy of Studies, Experiments, Samples, and their corresponding Runs. Studies have an overall goal and may be comprised of several Experiments. An Experiment describes specifically what was sequenced and the method used. It includes information about the source of the DNA, the Sample, the sequencing platform, and the processing of the data. Each Experiment is made up of one or more instrument Runs. A Run contains the results or reads from each spot in the instrument run.
suppressPackageStartupMessages(library(SRAdb))
suppressPackageStartupMessages(library(DBI))
# Download sqlite file online. it will take some time. The default
# storage location is in the current working directory and the default
# filename is SRAmetadb.sqlite
# To save time, we have already download it
# sqlfile <- getSRAdbFile()
sqlfile <- "SRAmetadb.sqlite"
sra_con <- dbConnect(SQLite(), sqlfile)
# See all the tables in the sqlite database
sra_tables <- dbListTables(sra_con)
sra_tables
## [1] "col_desc" "experiment" "metaInfo"
## [4] "run" "sample" "sra"
## [7] "sra_ft" "sra_ft_content" "sra_ft_segdir"
## [10] "sra_ft_segments" "study" "submission"
# suppressPackageStartupMessages(library('xtable'))
sql <- "select experiment_accession, platform from experiment"
res <- dbGetQuery(sra_con, sql)
platform.experiment <- as.data.frame(table(res[, 2]))
colnames(platform.experiment) <- c("Platform", "Number of experiment")
platform.experiment
## Platform Number of experiment
## 1 ABI_SOLID 4234
## 2 COMPLETE_GENOMICS 369
## 3 HELICOS 291
## 4 ILLUMINA 108641
## 5 ION_TORRENT 33
## 6 LS454 19947
## 7 PACBIO_SMRT 289
suppressPackageStartupMessages(library("googleVis"))
## Warning: A specification for S3 class "connection" in package 'RJSONIO'
## seems equivalent to one from package 'graph' and is not turning on
## duplicate class definitions for this class
## Warning: A specification for S3 class "file" in package 'RJSONIO' seems
## equivalent to one from package 'graph' and is not turning on duplicate
## class definitions for this class
print.table <- function(data = NULL, width = 1000, height = 520, pageSize = 50,
...) {
PopTable <- gvisTable(data, options = list(width = width, height = height,
pageSize = pageSize, page = "enable"))
print(PopTable, "chart")
}
# Function to get platform related human experiment and runs
get.platform.related.info <- function(con = NULL, platform) {
sql <- paste("select run_accession,sample_accession,experiment_accession,study_accession, spots, bases, experiment_title, study_name,library_layout,adapter_spec, platform,instrument_model, taxon_id from sra where taxon_id='9606' and platform='",
platform, "'", sep = "")
res = dbGetQuery(con, sql)
if (nrow(res)) {
# cat('<b>Platform',platform,'information table</b>') add length
read.len <- res$bases/res$spots
pair.index <- grep("PAIRED", res$library_layout)
read.len[pair.index] <- read.len[pair.index]/2
res <- cbind(res, Read.length = read.len)
outfile = paste("platform_", platform, "_info.csv", sep = "")
write.csv(res, outfile)
res[, 1] <- paste("<a href='http://www.ncbi.nlm.nih.gov/sra?term=",
res[, 1], "'>", res[, 1], "</a>", sep = "")
print.table(res, width = 1800)
} else {
cat("No result!")
}
}
platform = "HELICOS"
get.platform.related.info(con = sra_con, platform = platform)
platform = "ION_TORRENT"
get.platform.related.info(con = sra_con, platform = platform)
platform = "PACBIO_SMRT"
get.platform.related.info(con = sra_con, platform = platform)
No records