Validation of Variants Identified in PoolSeq by Optimised High Resolution Melting Assay Design
S. Baldwin, S Thomson, M Pither-Joyce, K Wright, R Macknight, S Lui, B Warren, Z Dwight, J McCallum
in preparation
Tue Jun 24 09:11:49 2014
Generated using script https://code.google.com/p/popoolation2/wiki/Manual#cmh-test.pl
For CMH test details see: http://en.wikipedia.org/wiki/Cochran%E2%80%93Mantel%E2%80%93Haenszel_statistics
Sample Ordering and coding as follows:
| Number | Bam file | Phenotype | ACP267 Genotype |
|---|---|---|---|
| 1 | BoltA1_RG.bam | Bolt | AA |
| 2 | BoltA2_RG.bam | Bolt | AA |
| 3 | BoltA3_RG.bam | Bolt | AA |
| 4 | BoltB1_RG.bam | Bolt | BB |
| 5 | BoltB2_RG.bam | Bolt | BB |
| 6 | BoltB3_RG.bam | Bolt | BB |
| 7 | NonA1_RG.bam | Non-bolt | AA |
| 8 | NonA2_RG.bam | Non-bolt | AA |
| 9 | NonA3_RG.bam | Non-bolt | AA |
| 10 | NonB1_RG.bam | Non-bolt | BB |
| 11 | NonB2_RG.bam | Non-bolt | BB |
| 12 | NonB3_RG.bam | Non-bolt | BB |
CMH Test Comparisons were performed on a subsampled (https://code.google.com/p/popoolation2/wiki/Manual#subsample-synchronized.pl) sync files (resampled to 50 reads per sample per site) as follows:
perl /software/x86_64/popoolation2/cmh-test.pl --input ../31.subsample_sync/subsample.sync --output All_bolt_assoc.cmh --max-coverage 1000 --population 1-10,2-11,3-12,4-7,5-8,6-9
perl /software/x86_64/popoolation2/cmh-test.pl --input ../31.subsample_sync/subsample.sync --output ACP267_LD.cmh --max-coverage 1000 --population 1-4,2-5,3-6,7-10,8-11,9-12
perl /software/x86_64/popoolation2/cmh-test.pl --input ../31.subsample_sync/subsample.sync --output Bolt_notACP267_LD_assoc.cmh --max-coverage 1000 --population 1-7,2-8,3-9,4-10,5-11,6-12
column_names <-c("BoltA1_RG.bam",
"BoltA2_RG.bam",
"BoltA3_RG.bam",
"BoltB1_RG.bam",
"BoltB2_RG.bam",
"BoltB3_RG.bam",
"NonA1_RG.bam",
"NonA2_RG.bam",
"NonA3_RG.bam",
"NonB1_RG.bam",
"NonB2_RG.bam",
"NonB3_RG.bam")
all_bolt <- read.table('data_files/All_bolt_assoc.cmh',col.names=c("chrom","pos","ref_base",column_names,"pval"))
hist(all_bolt$pval,xlab='P value',main='All')
library(qvalue)
all_qobj <- qvalue(all_bolt$pval)
par(mar = rep(2, 4)) ## so qplot doesnt fail due to margin settings
qplot(all_qobj)
### Estimate p value cutoff for FDR= 0.01
FDR <- 0.01
pval_cutoff <-max(all_qobj$pvalues[all_qobj$qvalues <= FDR])
Using a pvalue cutoff of 9.0311 × 10-5
acp267_LD <- read.table('data_files/ACP267_LD.cmh',col.names=c("chrom","pos","ref_base",column_names,"pval"))
Bolt_notACP267 <-read.table('data_files/Bolt_notACP267_LD_assoc.cmh',col.names=c("chrom","pos","ref_base",column_names,"pval"))
combined <- cbind(all_bolt,acp267_LD$pval,Bolt_notACP267$pval)
names(combined)<- gsub("\\$",replacement="_",names(combined))
This provides data on 285596 SNPs
library(qqman)
## Warning: package 'qqman' was built under R version 3.0.3
qq(combined$pval,main="All")
qq(combined$acp267_LD_pval,main='ACP267 LD')
qq(combined$Bolt_notACP267_pval,main='Other Bolt')
by_contig <- aggregate(data=combined,cbind(pval,acp267_LD$pval,Bolt_notACP267$pval) ~ chrom,min)
colnames(by_contig) <- c("chrom", "all_pval","ACP267_pval","NotACP267_pval")
This provides data on 10714 contigs
Bolt_assoc_contigs <- by_contig[which((by_contig$all_pval < pval_cutoff) |
(by_contig$ACP267_pval < pval_cutoff )|
(by_contig$NotACP267_pval < pval_cutoff)),]
write.csv(Bolt_assoc_contigs,file='Bolt_assoc_contig_pvals.csv',row.names =FALSE)
write.table(Bolt_assoc_contigs$chrom,file='sig_contigs.txt',quote=FALSE,col.names=FALSE,row.names=FALSE)
A total of 1443 contigs have a signicant pvalue less than 9.0311 × 10-5
with(Bolt_assoc_contigs,pairs(~ all_pval +
ACP267_pval +
NotACP267_pval,
main=" CMH Pvalues\n significant contigs only"))
as described at http://www.ats.ucla.edu/stat/r/faq/venn.htm
Form summary of booleans-contigs with minimum p-value < FDR cutoff
library(limma)
by_contig$sig_all <- by_contig$all_pval < pval_cutoff#cos we only took sig hits for this frame
by_contig$sig_ACP267 <- by_contig$ACP267_pval < pval_cutoff
by_contig$sig_NotACP267 <- by_contig$NotACP267_pval < pval_cutoff
b <- vennCounts(by_contig[5:7])
vennDiagram(b,main=" full data set",cex=1,names=c('ALL','ACP267','NOT_ACP267'))
dev.off()
## null device
## 1
Export these
write.csv(Bolt_assoc_contigs,file='Bolt_assoc_contigs.csv',row.names=FALSE)
Show Session info
sessionInfo()
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
##
## locale:
## [1] LC_COLLATE=English_New Zealand.1252
## [2] LC_CTYPE=English_New Zealand.1252
## [3] LC_MONETARY=English_New Zealand.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_New Zealand.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] limma_3.18.13 qqman_0.1.1 qvalue_1.36.0
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.4 evaluate_0.5.5 formatR_0.10 htmltools_0.2.4
## [5] knitr_1.6 rmarkdown_0.2.46 stringr_0.6.2 tcltk_3.0.2
## [9] tools_3.0.2