title: “Variant calling tutorial” output: html_notebook
#script for Variant calling tutorial
#Calling variants in reads mapped by bowtie
#Load SAMtools
#Prepare your directories
#Index the FASTA reference file
#Convert mapped reads from SAM to BAM, sort, and index
#Call genome variants
#Calling variants in reads mapped by BWA or Bowtie2
#Filtering VCF files with grep
#Comparing the results of different mappers using bedtools
module load samtools
samtools
which samtools
mkdir samtools_bowtie
cp bowtie/SRR030257.sam samtools_bowtie
cp bowtie/NC_012967.1.fasta samtools_bowtie
#Index the FASTA reference file
#First, you need to index the reference file. (This isn't indexing it for read mapping. It's indexing it so that SAMtools can quickly jump to a certain base in the reference.)Then run this command to index the reference file.
samtools faidx samtools_bowtie/NC_012967.1.fasta
#Take a look at the new *.fai file that was created by this command.
less samtools_bowtie/NC_012967.1.fasta.fai
#Convert mapped reads from SAM to BAM, sort, and index
#SAM is a text file, so it is slow to access information about how any given read was mapped. SAMtools and many of the commands that we will run later work on BAM files (essentially GZIP compressed binary forms of the text SAM files).
samtools view -b -S -o samtools_bowtie/SRR030257.bam samtools_bowtie/SRR030257.sam
#Sort and index the BAM file.
samtools sort samtools_bowtie/SRR030257.bam samtools_bowtie/SRR030257.sorted
samtools index samtools_bowtie/SRR030257.sorted.bam
#Call genome variants
#we use the mpileup command from samtools to compile information about the bases mapped to each reference position.Output BCF file. This is a binary form of the text Variant Call Format (VCF).
samtools mpileup -u -f samtools_bowtie/NC_012967.1.fasta samtools_bowtie/SRR030257.sorted.bam > samtools_bowtie/SRR030257.bcf
#Convert BCF to human-readable VCF:
bcftools view -v -c -g samtools_bowtie/SRR030257.bcf > samtools_bowtie/SRR030257.vcf
#Comparing the results of different mappers using bedtools
mkdir comparison
cp samtools_bowtie/SRR030257.vcf comparison/bowtie.vcf
cp samtools_bwa/SRR030257.vcf comparison/bwa.vcf
cp samtools_bowtie2/SRR030257.vcf comparison/bowtie2.vcf
cd comparison
#load module
module load bedtools
#Finding common mutations.
bedtools intersect -a bowtie.vcf -b bwa.vcf > common_bowtie_bwa.vcf
#Finding mutations that are unique for each mapper.
bedtools subtract -a bowtie.vcf -b common_bowtie_bwa.vcf > unique_bowtie.vcf
bedtools subtract -a bwa.vcf -b common_bowtie_bwa.vcf > unique_bwa.vcf
LS0tCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdAotLS0KLS0tCnRpdGxlOiAiVmFyaWFudCBjYWxsaW5nIHR1dG9yaWFsIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKCgpgYGB7cn0KI3NjcmlwdCBmb3IgVmFyaWFudCBjYWxsaW5nIHR1dG9yaWFsCiNDYWxsaW5nIHZhcmlhbnRzIGluIHJlYWRzIG1hcHBlZCBieSBib3d0aWUKI0xvYWQgU0FNdG9vbHMKI1ByZXBhcmUgeW91ciBkaXJlY3RvcmllcwojSW5kZXggdGhlIEZBU1RBIHJlZmVyZW5jZSBmaWxlCiNDb252ZXJ0IG1hcHBlZCByZWFkcyBmcm9tIFNBTSB0byBCQU0sIHNvcnQsIGFuZCBpbmRleAojQ2FsbCBnZW5vbWUgdmFyaWFudHMKI0NhbGxpbmcgdmFyaWFudHMgaW4gcmVhZHMgbWFwcGVkIGJ5IEJXQSBvciBCb3d0aWUyCiNGaWx0ZXJpbmcgVkNGIGZpbGVzIHdpdGggZ3JlcAojQ29tcGFyaW5nIHRoZSByZXN1bHRzIG9mIGRpZmZlcmVudCBtYXBwZXJzIHVzaW5nIGJlZHRvb2xzCm1vZHVsZSBsb2FkIHNhbXRvb2xzCnNhbXRvb2xzCndoaWNoIHNhbXRvb2xzCm1rZGlyIHNhbXRvb2xzX2Jvd3RpZQpjcCBib3d0aWUvU1JSMDMwMjU3LnNhbSBzYW10b29sc19ib3d0aWUKY3AgYm93dGllL05DXzAxMjk2Ny4xLmZhc3RhIHNhbXRvb2xzX2Jvd3RpZQojSW5kZXggdGhlIEZBU1RBIHJlZmVyZW5jZSBmaWxlCiNGaXJzdCwgeW91IG5lZWQgdG8gaW5kZXggdGhlIHJlZmVyZW5jZSBmaWxlLiAoVGhpcyBpc24ndCBpbmRleGluZyBpdCBmb3IgcmVhZCBtYXBwaW5nLiBJdCdzIGluZGV4aW5nIGl0IHNvIHRoYXQgU0FNdG9vbHMgY2FuIHF1aWNrbHkganVtcCB0byBhIGNlcnRhaW4gYmFzZSBpbiB0aGUgcmVmZXJlbmNlLilUaGVuIHJ1biB0aGlzIGNvbW1hbmQgdG8gaW5kZXggdGhlIHJlZmVyZW5jZSBmaWxlLgoKc2FtdG9vbHMgZmFpZHggc2FtdG9vbHNfYm93dGllL05DXzAxMjk2Ny4xLmZhc3RhCiNUYWtlIGEgbG9vayBhdCB0aGUgbmV3ICouZmFpIGZpbGUgdGhhdCB3YXMgY3JlYXRlZCBieSB0aGlzIGNvbW1hbmQuCmxlc3Mgc2FtdG9vbHNfYm93dGllL05DXzAxMjk2Ny4xLmZhc3RhLmZhaQoKI0NvbnZlcnQgbWFwcGVkIHJlYWRzIGZyb20gU0FNIHRvIEJBTSwgc29ydCwgYW5kIGluZGV4CiNTQU0gaXMgYSB0ZXh0IGZpbGUsIHNvIGl0IGlzIHNsb3cgdG8gYWNjZXNzIGluZm9ybWF0aW9uIGFib3V0IGhvdyBhbnkgZ2l2ZW4gcmVhZCB3YXMgbWFwcGVkLiBTQU10b29scyBhbmQgbWFueSBvZiB0aGUgY29tbWFuZHMgdGhhdCB3ZSB3aWxsIHJ1biBsYXRlciB3b3JrIG9uIEJBTSBmaWxlcyAoZXNzZW50aWFsbHkgR1pJUCBjb21wcmVzc2VkIGJpbmFyeSBmb3JtcyBvZiB0aGUgdGV4dCBTQU0gZmlsZXMpLgpzYW10b29scyB2aWV3IC1iIC1TIC1vIHNhbXRvb2xzX2Jvd3RpZS9TUlIwMzAyNTcuYmFtIHNhbXRvb2xzX2Jvd3RpZS9TUlIwMzAyNTcuc2FtCiNTb3J0IGFuZCBpbmRleCB0aGUgQkFNIGZpbGUuCnNhbXRvb2xzIHNvcnQgc2FtdG9vbHNfYm93dGllL1NSUjAzMDI1Ny5iYW0gc2FtdG9vbHNfYm93dGllL1NSUjAzMDI1Ny5zb3J0ZWQKc2FtdG9vbHMgaW5kZXggc2FtdG9vbHNfYm93dGllL1NSUjAzMDI1Ny5zb3J0ZWQuYmFtCiNDYWxsIGdlbm9tZSB2YXJpYW50cwojd2UgdXNlIHRoZSBtcGlsZXVwIGNvbW1hbmQgZnJvbSBzYW10b29scyB0byBjb21waWxlIGluZm9ybWF0aW9uIGFib3V0IHRoZSBiYXNlcyBtYXBwZWQgdG8gZWFjaCByZWZlcmVuY2UgcG9zaXRpb24uT3V0cHV0IEJDRiBmaWxlLiBUaGlzIGlzIGEgYmluYXJ5IGZvcm0gb2YgdGhlIHRleHQgVmFyaWFudCBDYWxsIEZvcm1hdCAoVkNGKS4KCnNhbXRvb2xzIG1waWxldXAgLXUgLWYgc2FtdG9vbHNfYm93dGllL05DXzAxMjk2Ny4xLmZhc3RhIHNhbXRvb2xzX2Jvd3RpZS9TUlIwMzAyNTcuc29ydGVkLmJhbSA+IHNhbXRvb2xzX2Jvd3RpZS9TUlIwMzAyNTcuYmNmCiNDb252ZXJ0IEJDRiB0byBodW1hbi1yZWFkYWJsZSBWQ0Y6CiAgYmNmdG9vbHMgdmlldyAtdiAtYyAtZyBzYW10b29sc19ib3d0aWUvU1JSMDMwMjU3LmJjZiA+IHNhbXRvb2xzX2Jvd3RpZS9TUlIwMzAyNTcudmNmCiAgCiAgI0NvbXBhcmluZyB0aGUgcmVzdWx0cyBvZiBkaWZmZXJlbnQgbWFwcGVycyB1c2luZyBiZWR0b29scwogIG1rZGlyIGNvbXBhcmlzb24KY3Agc2FtdG9vbHNfYm93dGllL1NSUjAzMDI1Ny52Y2YgY29tcGFyaXNvbi9ib3d0aWUudmNmCmNwIHNhbXRvb2xzX2J3YS9TUlIwMzAyNTcudmNmIGNvbXBhcmlzb24vYndhLnZjZgpjcCBzYW10b29sc19ib3d0aWUyL1NSUjAzMDI1Ny52Y2YgY29tcGFyaXNvbi9ib3d0aWUyLnZjZgpjZCBjb21wYXJpc29uCiAjbG9hZCBtb2R1bGUKIG1vZHVsZSBsb2FkIGJlZHRvb2xzCiNGaW5kaW5nIGNvbW1vbiBtdXRhdGlvbnMuCmJlZHRvb2xzIGludGVyc2VjdCAtYSBib3d0aWUudmNmIC1iIGJ3YS52Y2YgPiBjb21tb25fYm93dGllX2J3YS52Y2YKI0ZpbmRpbmcgbXV0YXRpb25zIHRoYXQgYXJlIHVuaXF1ZSBmb3IgZWFjaCBtYXBwZXIuCmJlZHRvb2xzIHN1YnRyYWN0IC1hIGJvd3RpZS52Y2YgLWIgY29tbW9uX2Jvd3RpZV9id2EudmNmID4gdW5pcXVlX2Jvd3RpZS52Y2YKYmVkdG9vbHMgc3VidHJhY3QgLWEgYndhLnZjZiAtYiBjb21tb25fYm93dGllX2J3YS52Y2YgPiB1bmlxdWVfYndhLnZjZgoKICAKCiAgCgoKYGBgCgo=