Libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.2
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Warning: package 'tibble' was built under R version 3.3.2
## Warning: package 'tidyr' was built under R version 3.3.2
## Warning: package 'readr' was built under R version 3.3.2
## Warning: package 'purrr' was built under R version 3.3.2
## Warning: package 'dplyr' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(admixturegraph)
## Warning: package 'admixturegraph' was built under R version 3.3.2
source("plotting_funcs.R")
Functions to generate diploid data, remove monomorphic loci
gendata<-function(migration_command,n,ninds){
hapdat<-matrix(unlist(lapply(strsplit(system(paste("mspms ", n*ninds, " 1 -t 450 -r 450 450 -I ", n, " ", paste(rep(ninds,n),collapse=" ") , " ", migration_command, " | tail -", n*ninds,sep=""),intern=T),''),as.numeric)),nrow=n*ninds,byrow=T)
genodat<-hapdat[seq(1,n*ninds,2),]+hapdat[seq(2,n*ninds,2),] # make diploid
write.table(t(genodat),file="~/crap.geno", quote=F, row.names=F, col.names=F, sep="") #write genotype file
return(genodat)
}
'%!in%' <- function(x,y)!('%in%'(x,y)) # "not a member of"" function
poly<-function(x){ if(x>0 && x<1){ return(TRUE)} else{return(FALSE)}} # find polymorphic loci
f4_calc<-function(perms,list_of_freqs,locus){
freqs<-sapply(perms,function(z) list_of_freqs[[z]][locus])
f4_freqs=freqs[perm]
return((f4_freqs[1]-f4_freqs[2])*(f4_freqs[3]-f4_freqs[4]))
}
Simulate
npops=6
ninds=20
kpops=6
#model no migration
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -es 0.07 6 0.8 -ej 0.07 6 3 -ej 0.07 7 2 -ej 0.1 3 1 -ej 0.6 2 1")
#model with migration
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -m 4 3 50.0 -m 3 4 50.0 -m 2 6 50.0 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -es 0.07 6 0.8 -ej 0.07 6 3 -ej 0.07 7 2 -ej 0.1 3 1 -ej 0.6 2 1")
#model migration with no pop split
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -m 4 3 50.0 -m 3 4 50.0 -m 2 6 100.0 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -ej 0.07 6 3 -ej 0.1 3 1 -ej 0.6 2 1")
#nothing
migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -m 2 6 20.0 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -ej 0.07 6 3 -ej 0.1 3 1 -ej 0.6 2 1")
mydat<-gendata(migration_command,npops,ninds)
Make distance matrix
nloci=dim(mydat)[2]
ibs_mat<-as.matrix(dist(mydat,method="manhattan",diag=TRUE,upper=TRUE)/nloci)
pops=c("PARV","MEX","ML","SL","SH","MH")
for_sil=data.frame(paste(1:60,"@",c(sapply(pops,function(j) rep(j,10))),sep=""),ibs_mat)
write.table(for_sil,file="sim_dist_matrix.txt",col.names=FALSE,row.names=FALSE,quote=FALSE,sep="\t")
Calculate silhoutte scores, make graphs
system("perl silhouette.pl -i sim_dist_matrix.txt -o temp -g")
Get individuals cut do to silhouett scores
#read in silscores
tempcut<-data.frame(read.table("temp_ind.sil",header=F),expand.grid(1:(ninds/2),1:npops))
colnames(tempcut)=c("pop","silscore","samplenum","popnum")
#get list of inds with silscores<0
cuts=sapply(1:npops, function(p) filter(tempcut,silscore<0 & popnum==p)$samplenum)
## Warning: package 'bindrcpp' was built under R version 3.3.2
Allele counts without silhouette cuts
allele_counts=list()
allele_freqs=list()
cum=0
rmpop=as.numeric()
for(i in 1:(npops)){
sample=1:(ninds/2)
if(length(sample)==0){
allele_counts[[i]]=paste(0,0,sep=",")
allele_freqs[[i]]=paste(0,0,sep=",")
rmpop=c(rmpop,i)
} else if(length(sample)==1) {
allele_counts[[i]]=paste(mydat[cum+sample,],2*length(sample)-mydat[cum+sample,],sep=",")
allele_freqs[[i]]=mydat[cum+sample,]/(2*length(sample))
}
else{
allele_counts[[i]]=paste(colSums(mydat[cum+sample,]),2*length(sample)-colSums(mydat[cum+sample,]),sep=",")
allele_freqs[[i]]=colSums(mydat[cum+sample,])/(2*length(sample))
}
cum=cum+max(sample)
}
treemix<-data.frame(matrix(unlist(allele_counts),ncol=npops,byrow=F))
#colnames(treemix)=paste("pop",rep(1:(npops)),sep="")
colnames(treemix)=c("PARV","MEX","ML","SL","SH","MH")
write.table(treemix,"treemix_input.txt",quote=F,row.name=F)
system("gzip -f treemix_input.txt")
Plot treemix
system("~/src/treemix/src/treemix -i ~/Desktop/sandbox/silhouette/treemix_input.txt.gz -o treemix_out -m 1")
plot_tree("~/Desktop/sandbox/silhouette/treemix_out")
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 0 <NA> NOT_ROOT MIG NOT_TIP 52 16 NA NA NA
## 2 1 SH NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 3 2 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 31 1 63 3
## 4 3 MH NOT_ROOT NOT_MIG TIP 63 NA NA NA NA
## 5 4 SL NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 6 15 PARV NOT_ROOT NOT_MIG TIP 16 NA NA NA NA
## 7 16 <NA> NOT_ROOT NOT_MIG NOT_TIP 52 15 1 2 4
## 8 31 ML NOT_ROOT NOT_MIG TIP 2 NA NA NA NA
## 9 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 63 4 1 1 1
## 10 52 <NA> ROOT NOT_MIG NOT_TIP 52 51 1 16 5
## 11 51 MEX NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 12 63 <NA> NOT_ROOT NOT_MIG NOT_TIP 2 32 2 3 1
## V11
## 1 <NA>
## 2 SH:0.036528
## 3 (ML:0.00786689,((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884):0.00226343
## 4 MH:0.0177283
## 5 SL:0.00398018
## 6 PARV:0.00902583
## 7 (PARV:0.00902583,(ML:0.00786689,((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884):0.00226343):0.0150036
## 8 ML:0.00786689
## 9 (SL:0.00398018,SH:0.036528):0.0015856
## 10 (MEX:3.34934e-06,(PARV:0.00902583,(ML:0.00786689,((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884):0.00226343):0.0150036);
## 11 MEX:3.34934e-06
## 12 ((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884
## x y ymin ymax
## 1 3.349340e-06 NA 0.0000000 0.8333333
## 2 5.550456e-02 0.25000000 0.1666667 0.3333333
## 3 1.726708e-02 0.50000000 0.0000000 0.6666667
## 4 2.617906e-02 0.08333333 0.0000000 0.1666667
## 5 2.295674e-02 0.41666667 0.3333333 0.5000000
## 6 2.402948e-02 0.75000000 0.6666667 0.8333333
## 7 1.500365e-02 0.66666667 0.0000000 0.8333333
## 8 2.513397e-02 0.58333333 0.5000000 0.6666667
## 9 1.897656e-02 0.33333333 0.1666667 0.5000000
## 10 0.000000e+00 0.83333333 0.0000000 1.0000000
## 11 3.349340e-06 0.91666667 0.8333333 1.0000000
## 12 1.739096e-02 0.16666667 0.0000000 0.5000000
## [1] "0.000223236 0 0.01500364934"
## [1] 5.550456e-02 2.617906e-02 2.295674e-02 2.402948e-02 2.513397e-02
## [6] 3.349340e-06
## [1] 0.003
## [1] "mse 0.000360186638888889"
## $d
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 0 <NA> NOT_ROOT MIG NOT_TIP 52 16 NA NA NA
## 2 1 SH NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 3 2 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 31 1 63 3
## 4 3 MH NOT_ROOT NOT_MIG TIP 63 NA NA NA NA
## 5 4 SL NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 6 15 PARV NOT_ROOT NOT_MIG TIP 16 NA NA NA NA
## 7 16 <NA> NOT_ROOT NOT_MIG NOT_TIP 52 15 1 2 4
## 8 31 ML NOT_ROOT NOT_MIG TIP 2 NA NA NA NA
## 9 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 63 4 1 1 1
## 10 52 <NA> ROOT NOT_MIG NOT_TIP 52 51 1 16 5
## 11 51 MEX NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 12 63 <NA> NOT_ROOT NOT_MIG NOT_TIP 2 32 2 3 1
## V11
## 1 <NA>
## 2 SH:0.036528
## 3 (ML:0.00786689,((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884):0.00226343
## 4 MH:0.0177283
## 5 SL:0.00398018
## 6 PARV:0.00902583
## 7 (PARV:0.00902583,(ML:0.00786689,((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884):0.00226343):0.0150036
## 8 ML:0.00786689
## 9 (SL:0.00398018,SH:0.036528):0.0015856
## 10 (MEX:3.34934e-06,(PARV:0.00902583,(ML:0.00786689,((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884):0.00226343):0.0150036);
## 11 MEX:3.34934e-06
## 12 ((SL:0.00398018,SH:0.036528):0.0015856,MH:0.0177283):0.000123884
## x y ymin ymax
## 1 3.349355e-06 0.83329613 0.0000000 0.8333333
## 2 5.550456e-02 0.25000000 0.1666667 0.3333333
## 3 1.726708e-02 0.50000000 0.0000000 0.6666667
## 4 2.617906e-02 0.08333333 0.0000000 0.1666667
## 5 2.295674e-02 0.41666667 0.3333333 0.5000000
## 6 2.402948e-02 0.75000000 0.6666667 0.8333333
## 7 1.500365e-02 0.66666667 0.0000000 0.8333333
## 8 2.513397e-02 0.58333333 0.5000000 0.6666667
## 9 1.897656e-02 0.33333333 0.1666667 0.5000000
## 10 0.000000e+00 0.83333333 0.0000000 1.0000000
## 11 3.349340e-06 0.91666667 0.8333333 1.0000000
## 12 1.739096e-02 0.16666667 0.0000000 0.5000000
##
## $e
## V1 V2 V3 V4 V5 V6 V7
## 1 16 15 9.025830e-03 1.000000 NOT_MIG 0.000000000 1.00000
## 2 32 4 3.980180e-03 1.000000 NOT_MIG 0.000000000 1.00000
## 3 0 16 1.500030e-02 1.000000 NOT_MIG 0.000223236 1.00000
## 4 32 1 3.652800e-02 1.000000 NOT_MIG 0.000000000 1.00000
## 5 52 51 3.349340e-06 1.000000 NOT_MIG 0.000000000 1.00000
## 6 2 31 7.866890e-03 1.000000 NOT_MIG 0.000000000 1.00000
## 7 16 2 2.263430e-03 1.000000 NOT_MIG 0.000000000 1.00000
## 8 52 0 3.349340e-06 1.000000 NOT_MIG 0.000000000 1.00000
## 9 0 3 0.000000e+00 0.295933 MIG 0.000223236 0.00000
## 10 2 63 1.238840e-04 1.000000 NOT_MIG 0.000000000 1.00000
## 11 63 32 1.585600e-03 1.000000 NOT_MIG 0.000000000 1.00000
## 12 63 3 8.788102e-03 0.704067 NOT_MIG 0.000000000 0.49571
Allele counts with silhouette cuts
allele_counts=list()
allele_freqs=list()
cum=0
rmpop=as.numeric()
for(i in 1:(npops)){
sample=which(1:(ninds/2) %!in% cuts[[i]])
if(length(sample)==0){
allele_counts[[i]]=paste(0,0,sep=",")
allele_freqs[[i]]=paste(0,0,sep=",")
rmpop=c(rmpop,i)
} else if(length(sample)==1) {
allele_counts[[i]]=paste(mydat[cum+sample,],2*length(sample)-mydat[cum+sample,],sep=",")
allele_freqs[[i]]=mydat[cum+sample,]/(2*length(sample))
}
else{
allele_counts[[i]]=paste(colSums(mydat[cum+sample,]),2*length(sample)-colSums(mydat[cum+sample,]),sep=",")
allele_freqs[[i]]=colSums(mydat[cum+sample,])/(2*length(sample))
}
cum=cum+max(sample)
}
treemix<-data.frame(matrix(unlist(allele_counts),ncol=npops,byrow=F))
#colnames(treemix)=paste("pop",rep(1:(npops)),sep="")
colnames(treemix)=c("PARV","MEX","ML","SL","SH","MH")
write.table(treemix,"treemix_input.txt",quote=F,row.name=F)
system("gzip -f treemix_input.txt")
Plot treemix
system("~/src/treemix/src/treemix -i ~/Desktop/sandbox/silhouette/treemix_input.txt.gz -o treemix_out -m 1")
plot_tree("~/Desktop/sandbox/silhouette/treemix_out")
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/Desktop/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 0 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 15 1 32 2
## 2 1 MH NOT_ROOT NOT_MIG TIP 59 NA NA NA NA
## 3 2 <NA> NOT_ROOT MIG NOT_TIP 32 4 NA NA NA
## 4 3 SH NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 5 4 MEX NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 6 16 <NA> ROOT NOT_MIG NOT_TIP 16 0 3 59 3
## 7 15 ML NOT_ROOT NOT_MIG TIP 0 NA NA NA NA
## 8 31 PARV NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 9 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 0 31 1 4 1
## 10 51 SL NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 11 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 59 3 1 51 1
## 12 59 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 52 2 1 1
## V11
## 1 (ML:0.00764021,(PARV:0.00949125,MEX:0.0150432):0.00216674):0.000256928
## 2 MH:0.0183787
## 3 <NA>
## 4 SH:0.0366425
## 5 MEX:0.0150432
## 6 ((ML:0.00764021,(PARV:0.00949125,MEX:0.0150432):0.00216674):0.000256928,((SH:0.0366425,SL:0.00386572):0.00137858,MH:0.0183787):0.000256928);
## 7 ML:0.00764021
## 8 PARV:0.00949125
## 9 (PARV:0.00949125,MEX:0.0150432):0.00216674
## 10 SL:0.00386572
## 11 (SH:0.0366425,SL:0.00386572):0.00137858
## 12 ((SH:0.0366425,SL:0.00386572):0.00137858,MH:0.0183787):0.000256928
## x y ymin ymax
## 1 0.000256928 0.83333333 0.5000000 1.0000000
## 2 0.009120663 0.08333333 0.0000000 0.1666667
## 3 0.017462268 NA 0.5000000 0.6666667
## 4 0.038278008 0.41666667 0.3333333 0.5000000
## 5 0.017466846 0.58333333 0.5000000 0.6666667
## 6 0.000000000 0.50000000 0.0000000 1.0000000
## 7 0.007897138 0.91666667 0.8333333 1.0000000
## 8 0.011914918 0.75000000 0.6666667 0.8333333
## 9 0.002423668 0.66666667 0.5000000 0.8333333
## 10 0.005501228 0.25000000 0.1666667 0.3333333
## 11 0.001635508 0.33333333 0.1666667 0.5000000
## 12 0.000256928 0.16666667 0.0000000 0.5000000
## [1] "0.999696 0.002423668 0.01746684585"
## [1] 0.009120663 0.038278008 0.017466846 0.007897138 0.011914918 0.005501228
## [1] 0.003
## [1] "mse 0.000368204944444444"
## $d
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 0 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 15 1 32 2
## 2 1 MH NOT_ROOT NOT_MIG TIP 59 NA NA NA NA
## 3 2 <NA> NOT_ROOT MIG NOT_TIP 32 4 NA NA NA
## 4 3 SH NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 5 4 MEX NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 6 16 <NA> ROOT NOT_MIG NOT_TIP 16 0 3 59 3
## 7 15 ML NOT_ROOT NOT_MIG TIP 0 NA NA NA NA
## 8 31 PARV NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 9 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 0 31 1 4 1
## 10 51 SL NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 11 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 59 3 1 51 1
## 12 59 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 52 2 1 1
## V11
## 1 (ML:0.00764021,(PARV:0.00949125,MEX:0.0150432):0.00216674):0.000256928
## 2 MH:0.0183787
## 3 <NA>
## 4 SH:0.0366425
## 5 MEX:0.0150432
## 6 ((ML:0.00764021,(PARV:0.00949125,MEX:0.0150432):0.00216674):0.000256928,((SH:0.0366425,SL:0.00386572):0.00137858,MH:0.0183787):0.000256928);
## 7 ML:0.00764021
## 8 PARV:0.00949125
## 9 (PARV:0.00949125,MEX:0.0150432):0.00216674
## 10 SL:0.00386572
## 11 (SH:0.0366425,SL:0.00386572):0.00137858
## 12 ((SH:0.0366425,SL:0.00386572):0.00137858,MH:0.0183787):0.000256928
## x y ymin ymax
## 1 0.000256928 0.83333333 0.5000000 1.0000000
## 2 0.009120663 0.08333333 0.0000000 0.1666667
## 3 0.017462273 0.58335867 0.5000000 0.6666667
## 4 0.038278008 0.41666667 0.3333333 0.5000000
## 5 0.017466846 0.58333333 0.5000000 0.6666667
## 6 0.000000000 0.50000000 0.0000000 1.0000000
## 7 0.007897138 0.91666667 0.8333333 1.0000000
## 8 0.011914918 0.75000000 0.6666667 0.8333333
## 9 0.002423668 0.66666667 0.5000000 0.8333333
## 10 0.005501228 0.25000000 0.1666667 0.3333333
## 11 0.001635508 0.33333333 0.1666667 0.5000000
## 12 0.000256928 0.16666667 0.0000000 0.5000000
##
## $e
## V1 V2 V3 V4 V5 V6 V7
## 1 0 15 7.640210e-03 1.000000 NOT_MIG 0.000000 1.000000
## 2 16 0 2.569280e-04 1.000000 NOT_MIG 0.000000 1.000000
## 3 52 3 3.664250e-02 1.000000 NOT_MIG 0.000000 1.000000
## 4 2 4 4.577850e-06 1.000000 NOT_MIG 0.999696 1.000000
## 5 32 31 9.491250e-03 1.000000 NOT_MIG 0.000000 1.000000
## 6 0 32 2.166740e-03 1.000000 NOT_MIG 0.000000 1.000000
## 7 2 1 0.000000e+00 0.305534 MIG 0.999696 0.000000
## 8 52 51 3.865720e-03 1.000000 NOT_MIG 0.000000 1.000000
## 9 32 2 1.503860e-02 1.000000 NOT_MIG 0.000000 1.000000
## 10 16 59 2.569280e-04 1.000000 NOT_MIG 0.000000 1.000000
## 11 59 52 1.378580e-03 1.000000 NOT_MIG 0.000000 1.000000
## 12 59 1 8.863735e-03 0.694466 NOT_MIG 0.000000 0.482283
#blah