/CIBIO/sharedCM/projects/metagenomicassembly/agambiae/evaluation/results_megahit
Two samples
Both samples have Enterobacter Ag1, genome here,
New sample: Sample_zanzf150 with presence of Microsporidiae. No assembly was possible due to no enough contigs.
H1’s reconstruced Ag1 covered 99.something % of AG refgen, H2 only 67%.
Extract all Enterobacter sp. Ag1 and two Cedecea neteri contigs and store in a new fasta file ready annotable with PROKKA
python ~/pyphlan/fna_sss.py --select --ids <( egrep "Ag1|NODE_16_|NODE_91" /CIBIO/sharedCM/projects/metagenomicassembly/agambiae/evaluation/results_megahit/Sample_H1_2_140_out.txt | cut -f1 ) /CIBIO/sharedCM/projects/metagenomicassembly/agambiae/Sample_H1_2_140_megahit/contigs.fasta > /CIBIO/sharedCM/projects/agambie/H1/Enterobacter_Ag1_extracted.fasta
Download Mauve aligner, run the Move contig tool from Tools menu. Add refgen first and then the assembled genome. Last alignemnt folder contains the ordered contigs.
export PATH=/CIBIO/sharedCM/tools/prokka-1.11/bin:/CIBIO/sharedCM/tools/barrnap-0.7/bin:$PATH
prokka --setupdb
prokka /CIBIO/sharedCM/projects/agambie/H1/Enterobacter_Ag1_extracted.fasta --outdir /CIBIO/sharedCM/projects/agambie/H1/prokkaanotation --force --metagenome --cpus 30
while read line; do
rsync --copy-links --recursive --times --verbose rsync://$line /CIBIO/sharedCM/projects/agambie/NCBI_enterobacter/.
done < <(wget -qO - ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt | grep Enterobacter | cut -f20 | sed 's/ftp:\/\///g')
mkdir corrected_gff parallel bp_genbank2gff3 {} --outdir - > /CIBIO/sharedCM/projects/agambie/corrected_gff/{.}.gff ::: /CIBIO/sharedCM/projects/agambie/NCBI_enterobacter/*/*gbff.gz
| Species | Strain | Assembly |
|---|---|---|
| Enterobacter sp. | Ag1 | GCA_000277545.1 |
| Cedecea neteri | ND14a | GCA_000758325.1 |
| Cedecea neteri | M006 | GCA_000758305.1 |
| Klebsiella michiganensis | RC10 | GCA_000963575.1 |
| Cedecea neteri | SSMD04 | GCA_000757825.1 |
| Cedecea neteri | NBRC 105707 | GCA_001571265.1 |
| Cedecea daviseae | DSM 4568 | GCA_000412335.2 |
| Enterobacter cloaceae | ATCC 13047 | GCA_000025565.1 |
| Escherichia coli | K12MG1655 | G001265845 |
| Pseudomonas aeroginosa | G000796765 | |
| Serratia marcensis | FGI94 | G000330865 |
| Yersinia pestis | CO92 | G000009065 |
| Pantoea agglomerans | P10c | G001288285 |
| Cronobacter sakazakii | 8399 | G000467775 |
| Enterobacter asburiae | ATCC 35953 | G001521715 |
| Enterobacter hormaechei | oharae | G000807405 |
| Enterobacter lignolyticus | SCF1 | G000164865 |
| Enterobacter soli | ATCC BAA-2102 | G001654845 |
| Enterobacter cancerogenus | M004 | G000773965 |
| Klebsiella pneumoniae | HS11286 | G000240185 |
| Salmonella enterica | LT2 | G000006945 |
| Shigella dysenteriae | Sd197 | G000012005 |
export PATH=/CIBIO/sharedCM/tools/Roary_git/bin:$PATH
export PERL5LIB=/CIBIO/sharedCM/tools/Roary_git/lib:$PERL5LIB
export PERL5LIB=/home/e.pasolli/perl5/lib/perl5:$PERL5LIB
roary /CIBIO/sharedCM/projects/agambie/NCBI_enterobacter/corrected_gff/*gff -p 50 -e -f /CIBIO/sharedCM/projects/agambie/roary_out/roary_out -f /CIBIO/sharedCM/projects/agambie/roary_out -g 1000000
FastTree -nt -gtr core_gene* > tree.nwk
coretree <- read.tree("/CIBIO/sharedCM/projects/agambie/H1/neargenomes/out_p80/tree.nwk.rerooted")
coretree$tip.label <- gsub("\\_"," ", coretree$tip.label)
p <- ggtree(coretree, aes(linesize=32)) +
geom_tiplab(fontface="italic", hjust = -0.03, family="sans-serif") +
geom_tippoint() +
geom_treescale()
p <- ggtree::rotate(p, 22)
svg("out.svg", width = 10, height = 10)
p
dev.off()
## png
## 2
No 16S gene from PROKKA annotation, so let’s build it from the reference and the reads!
bowtie2 alignment withe the reference genome
bowtie2 -a --no-unal --very-sensitive -x ../bowtie/btindex/Ag1 -1 /CIBIO/sharedCM/seq_internal/20140428_agambiae/Sample_H1_2_140/H1_2_140_R1.filteredpaired.artifactremoved.anscreen.fastq.bz2 -2 /CIBIO/sharedCM/seq_internal/20140428_agambiae/Sample_H1_2_140/H1_2_140_R2.filteredpaired.artifactremoved.anscreen.fastq.bz2 | samtools view -Sb - > H1.bamExtarction of the 16S region, 10th contig, 1447bp
samtools view -h H1.bam "AKXM01000010:1-1447" | samtools view -Sb - > 16S.bamConsensus calling on extracted reads
/CIBIO/sharedCM/tools/samtools/samtools_v1.3.1/bin/samtools mpileup -uf ../Ag1/AKXM01.1.fsa_nt 16S.bam | /CIBIO/sharedCM/tools/bcftools/bcftools_v1.3.1/bin/bcftools call -c | /CIBIO/sharedCM/tools/bcftools/bcftools_v1.3.1/bin/vcfutils.pl vcf2fq > 16S_consensus.fasta# system("biom add-metadata -i /CIBIO/sharedCM/projects/anofele/bue-fastq/otus/larvae_emerg.biom -o /CIBIO/sharedCM/projects/anofele/bue-fastq/larvae_emerg/larvae_emerg_wtax.biom -m /CIBIO/sharedCM/projects/anofele/bue-fastq/mapping_file.txt")
# system("biom convert -i /CIBIO/sharedCM/projects/anofele/bue-fastq/larvae_emerg/larvae_emerg_wtax.biom -o /CIBIO/sharedCM/projects/anofele/bue-fastq/larvae_emerg/larvae_emerg_wtax.txt --to-json --table-type \"OTU table\"")
# otu_table <- "/CIBIO/sharedCM/projects/anofele/bue-fastq/larvae_emerg/larvae_emerg_wtax.txt"
# mapping_file <- "/CIBIO/sharedCM/projects/anofele/bue-fastq/mapping_file.txt"
# tree_otu <- "/CIBIO/sharedCM/projects/anofele/bue-fastq/otus/rep_set.tre"
# rep_set <- "/CIBIO/sharedCM/projects/anofele/bue-fastq/otus/new_refseqs.fna"
#qiime_data <- import_biom(BIOMfilename = otu_table, treefilename = read_tree(tree_otu), refseqfilename = rep_set, refseqFunction = parse_taxonomy_default)
#plot_bar(filter_taxa(qiime_data, function(x) sum(x > 50) > (0.5*length(x)), TRUE), fill="Rank5")
taxonomy <- read.delim("/CIBIO/sharedCM/projects/anofele/bue-fastq/larvae_emerg/taxa/larvae_emerg_L5.txt", skip = 1)
rownames(taxonomy) <- taxonomy$X.OTU.ID
taxonomy <- taxonomy[,-1]
threshold <- 0.005
taxonomy[apply(taxonomy, 1, mean)<threshold,"phylum"] <- "Other"
taxonomy[apply(taxonomy[,-which(names(taxonomy)=="phylum")], 1, function(x) {mean(x, na.rm = T)})>threshold,"phylum"] <- rownames(taxonomy[apply(taxonomy[,-which(names(taxonomy)=="phylum")], 1, function(x) {mean(x, na.rm = T)})>threshold,])
x<-melt(taxonomy, id.vars = c("phylum"))
x$phylum <- sapply(sapply(lapply(x$phylum, strsplit, ";"),`[[`,1),tail, n=1)
x$phylum <- gsub("D_4__","",x$phylum)
x <- melt(dcast(x, phylum~variable, fun.aggregate = sum))
## Using phylum as id variables
x[,"stage"] <- sapply(x$variable, substr, 2,2)
# svg("taxonomy.svg", width = 11)
ggplot() +
geom_bar(data=x, aes(x=variable, y=value, fill=phylum), stat="identity") +
scale_fill_manual(values=c("#47004b","#00f7b7","#d10090","#01deca","#ed2300","#017adb","#3d6200","#ff83cf","#005f43","#ff8f7c","#003d96","#ffe89f","#1a000b","#0000CC","#4b0016","#dfc7ff"), name="Genus") +
facet_grid( ~ stage, scale="free", space = "free_x")
# dev.off()