Rのsystemコマンドを使ってusearchを呼び出し実行する。 OTUテーブルを作製するところまでやっている。
usearch -fastq_mergepairs R1.fastq -reverse R2.fastq -relabel @ -fastqout merged.fqsuppressMessages(suppressWarnings(library(stringr)))
Sys.setenv(PATH = "~/program/usearch8.1.1861:/usr/local/opt/coreutils/libexec/gnubin")
# fastqファイルフルパス
fq <- list.files(".", pattern = "fastq", full.names = T)
# メタデータ
meta <- read.table("PRJEB4224.txt", header = T, sep = "\t", stringsAsFactors = F)
# ファイル名プレフィックス
smp <- unique(str_sub(unlist(lapply(strsplit(fq, "/"), function(x){tail(x, n=1)})), 1, 9))
# check
meta[1:3, 1:10]
## study_accession sample_accession secondary_sample_accession
## 1 PRJEB4224 SAMEA2173964 ERS342022
## 2 PRJEB4224 SAMEA2173965 ERS342023
## 3 PRJEB4224 SAMEA2173966 ERS342024
## experiment_accession run_accession tax_id scientific_name
## 1 ERX306486 ERR333581 1118232 root metagenome
## 2 ERX306487 ERR333582 1118232 root metagenome
## 3 ERX306488 ERR333583 1118232 root metagenome
## instrument_model library_name library_layout
## 1 Illumina MiSeq RunB PAIRED
## 2 Illumina MiSeq RunB PAIRED
## 3 Illumina MiSeq RunB PAIRED
smp[1:3]
## [1] "ERR333581" "ERR333582" "ERR333583"
usearch -fastq_mergepairs R1.fastq -reverse R2.fastq -relabel @ -fastqout merged.fq#pb <- txtProgressBar(min = 1, max = length(smp), style = 3) # プログレスバー
for(i in 1:length(smp)){
comp1 <- paste("usearch",
"-fastq_mergepairs", paste0(smp[i], "_1.fastq"),
"-reverse", paste0(smp[i], "_2.fastq"),
"-relabel @",
"-fastqout", paste0(smp[i], ".merged.fq >> merge.log 2>&1")
)
system(comp1, intern = F, wait = T)
#setTxtProgressBar(pb, i)
}
usearch -fastq_filter merged.fq -fastq_maxee 1.0 -relabel Filt -fastaout filtered.fa## オプションの値
truncq <- 30 # fastq_truncqual
minlen <- 200 # fastq_minlen
maxee <- 0.25 # fastq_maxee (厳しくするなら、0.5~0.25推奨)
#pb <- txtProgressBar(min = 1, max = length(smp), style = 3) #
for(i in 1:length(smp)){
comp2 <- paste("usearch",
"-fastq_filter", paste0(smp[i], ".merged.fq"),
"-fastq_truncqual", truncq,
"-fastq_minlen", minlen,
"-fastq_maxee 0.25",
"-relabel Filt",
"-fastaout", paste0(smp[i], ".flt.fa >> flt.log 2>&1")
)
system(comp2, intern = F, wait = T)
#setTxtProgressBar(pb, i)
}
usearch -derep_fulllength filtered.fa -relabel Uniq -sizeout -fastaout derep.fa# フィルタ済みリード(fasta)を結合 ----
fltfa <- list.files(".", "*.flt.fa")
com3 <- paste("cat", paste(fltfa, collapse = " "), ">", "./filterd.fa")
system(com3, intern = F, wait = T)
# Dereplication コマンド----
com4 <- "usearch -derep_fulllength filterd.fa -relabel Uniq -sizeout -fastaout derep.fa > derep.log 2>&1"
system(com4, intern = F, wait = T)
usearch -sortbysize derep.fa -fastaout sorted.fa -minsize 2com5 <- "usearch -sortbysize derep.fa -fastaout sorted.fa -minsize 2 > sort.log 2>&1"
system(com5, intern = F, wait = T)
usearch -cluster_otus derep.fa -minsize 2 -otus otus.fa -relabel Otucom6 <- "usearch -cluster_otus sorted.fa -minsize 2 -otus otus.fa -relabel Otu > cluster.log 2>&1"
system(com6, intern = F, wait = T) # 15484
usearch -uchime_ref otus.fa -db gold.fa -uchimeout re_uchime.tab -strand plus# uchimedb <-"~/db/meta16s/gold.fa"
# com7 <- paste("usearch -uchime_ref otus.fa -db", uchimedb, "-uchimeout otus_uchi.tab -strand plus > chimref.log")
# system(com7, intern = F, wait = T) # 28700
usearch -makeudb_utax 16s_ref.fa -output 16s_ref.udb -report 16s_report.txtusearch -utax otus.fa -db 16s_ref.udb -strand both -fastaout otu_tax.fa >> utax.log 2>&1"udb <- "~/db/meta16s/rdp_16s_trainset15/fasta/16s_ref.udb" # UDBデータベース
com8 <- paste("usearch -utax otus.fa -db", udb,
"-strand both",
"-fastaout otus_tax.fa",
">> utax.log 2>&1")
system(com8, intern = F, wait = T)
usearch -usearch_global merged.fq -db otu_tax.fa -strand plus -id 0.97 -otutabout otu.tab -biomout otutab.json#pb <- txtProgressBar(min = 1, max = length(smp), style = 3)
for(i in 1:length(smp)){
comp9 <- paste("usearch -usearch_global", paste0(smp[i],".merged.fq"),
"-db otus_tax.fa",
"-strand plus",
"-id 0.97", # exact matchの場合は1.0
"-otutabout", paste0(smp[i], ".otu.tab"),
"-biomout", paste0(smp[i],".otutab.json"),
">> usearch.log 2>&1")
system(comp9, intern = F, wait = T)
#setTxtProgressBar(pb, i)
}