library(tidyverse)
library(cowplot)
library(admixturegraph)
library(neldermead)
library(doParallel)
library(parallel)
library(foreach)
library(pbapply)
library(cluster)
library(viridis)
source("plotting_funcs.R")
library(LEA)
gendata<-function(migration_command,n,ninds){
hapdat<-matrix(unlist(lapply(strsplit(system(paste("mspms ", n*ninds, " 1 -t 4500 -r 4500 4500 -I ", n, " ", paste(rep(ninds,n),collapse=" ") , " ", migration_command, " | tail -", n*ninds,sep=""),intern=T),''),as.numeric)),nrow=n*ninds,byrow=T) # run coalescent sim
#2 GHOST POP
#hapdat<-matrix(unlist(lapply(strsplit(system(paste("mspms ", n*ninds, " 1 -t 450 -r 450 450 -I ", n+2, " ", paste(c(rep(ninds,n),rep(0,2)),collapse=" ") , " ", migration_command, " | tail -", n*ninds,sep=""),intern=T),''),as.numeric)),nrow=n*ninds,byrow=T) # run coalescent sim
paste(c(rep(ninds,n),rep(0,2)))
genodat<-hapdat[seq(1,n*ninds,2),]+hapdat[seq(2,n*ninds,2),] # make diploid
write.table(t(genodat),file="~/crap.geno", quote=F, row.names=F, col.names=F, sep="") #write genotype file
return(genodat)
}
'%!in%' <- function(x,y)!('%in%'(x,y)) # "not a member of"" function
poly<-function(x){ if(x>0 && x<1){ return(TRUE)} else{return(FALSE)}} # find polymorphic loci
f4_calc<-function(perms,list_of_freqs,locus){
freqs<-sapply(perms,function(z) list_of_freqs[[z]][locus])
f4_freqs=freqs[perm]
return((f4_freqs[1]-f4_freqs[2])*(f4_freqs[3]-f4_freqs[4]))
} #calculate f4 stats (not used)
npops=6
ninds=20
kpops=6
#model w/ admixture but no migration
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -es 0.07 6 0.8 -ej 0.07 6 3 -ej 0.07 7 2 -ej 0.1 3 1 -ej 0.6 2 1")
#model with admixture and migration
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -m 5 4 .5 -m 4 5 .5 -m 4 3 .5 -m 3 4 .5 -m 2 6 .1 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -es 0.07 6 0.8 -ej 0.07 6 3 -ej 0.07 7 2 -ej 0.1 3 1 -ej 0.6 2 1")
#model migration with no pop split
migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -m 6 2 10 -g 3 200 -ej 0.0075 5 4 -ej 0.01 4 3 -ej 0.011 6 3 -ej 0.02 3 1 -ej 0.12 2 1")
#nothing
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -ej 0.07 6 3 -ej 0.1 3 1 -ej 0.6 2 1")
mydat<-gendata(migration_command,npops,ninds)
Set Tree
leaves <- c("SH", "SL", "MH", "ML","PARV","MEX")
inner_nodes <- c("S", "M", "SM","PM","PX")
edges <- parent_edges(c(edge("SH", "S"),
edge("SL", "S"),
edge("ML", "M"),
# admixture_edge("oMH", "MM","M", "α"),
edge("MH", "M"),
edge("M","SM"),
# edge("MH","oMH"),
edge("S","SM"),
edge("PARV","PM"),
edge("SM","PM"),
edge("PM","PX"),
edge("MEX","PX")
))
graph <- agraph(leaves, inner_nodes, edges)
make_an_outgroup(graph, "MEx")
plot(graph, show_inner_node_labels = TRUE, show_admixture_labels = TRUE)
Make list of trees
graph_list <- add_an_admixture(graph, "MIX")
Make distance matrix
nloci=dim(mydat)[2]
ibs_mat<-as.matrix(dist(mydat,method="manhattan",diag=TRUE,upper=TRUE)/nloci)
pops=c("PARV","MEX","ML","SL","SH","MH")
for_sil=data.frame(paste(1:(npops*ninds/2),"@",c(sapply(pops,function(j) rep(j,ninds/2))),sep=""),ibs_mat)
write.table(for_sil,file="sim_dist_matrix.txt",col.names=FALSE,row.names=FALSE,quote=FALSE,sep="\t")
Calculate silhoutte scores, make graphs
system("perl silhouette.pl -i sim_dist_matrix.txt -o temp -g")
Get individuals cut due to silhouette scores
#read in silscores
tempcut<-data.frame(read.table("temp_ind.sil",header=F),expand.grid(1:(ninds/2),1:npops))
colnames(tempcut)=c("pop","silscore","samplenum","popnum")
#get list of inds with silscores<0
cuts=sapply(1:npops, function(p) filter(tempcut,silscore<0 & popnum==p)$samplenum)
## Warning: package 'bindrcpp' was built under R version 3.3.2
#cut based on silhouette scores
keep<-sort(which(silhouette(kmeans(dist(mydat),npops)[[1]],dist(mydat))[,3]>0))
left_out<-which(!(1:(npops*ninds/2) %in% keep))
#####TESTING ONLY PLEASE DELETE
#####TESTING ONLY PLEASE DELETE
#left_out<-c(51:58)
#####TESTING ONLY PLEASE DELETE
#####TESTING ONLY PLEASE DELETE
cut_out<-paste(left_out,tempcut$pop[left_out])
if(length(left_out)>0){
print(paste("The following",length(left_out),"individuals were removed:"))
print(cut_out)
} else {
print("Nobody cut.")
}
## [1] "The following 7 individuals were removed:"
## [1] "51 SL" "52 SL" "53 SL" "56 SL" "57 SL" "59 SL" "60 SL"
#make new data from cut
cutdat<-mydat[keep,]
write.table(t(cutdat),file="~/crapcut.geno",quote=F,row.names=F,col.names=F,sep="")
Allele counts without silhouette cuts
allele_counts=list()
allele_freqs=list()
cumulative=0
rmpop=as.numeric()
for(i in 1:(npops)){
sample=1:(ninds/2)
if(length(sample)==0){
allele_counts[[i]]=paste(0,0,sep=",")
allele_freqs[[i]]=paste(0,0,sep=",")
rmpop=c(rmpop,i)
} else if(length(sample)==1) {
allele_counts[[i]]=paste(mydat[cumulative+sample,],2*length(sample)-mydat[cumulative+sample,],sep=",")
allele_freqs[[i]]=mydat[cumulative+sample,]/(2*length(sample))
}
else{
allele_counts[[i]]=paste(colSums(mydat[cumulative+sample,]),2*length(sample)-colSums(mydat[cumulative+sample,]),sep=",")
allele_freqs[[i]]=colSums(mydat[cumulative+sample,])/(2*length(sample))
}
cumulative=cumulative+max(sample)
}
Allele counts with silhouette cuts
cut_allele_counts=list()
cut_allele_freqs=list()
cum=0
rmpop=as.numeric()
if(length(left_out)==0){
cut_allele_counts=allele_counts
cut_allele_freqs=allele_freqs
} else{
for(i in 1:(npops)){
sample=which(1:(ninds/2) %!in% cuts[[i]])
if(length(sample)==0){
cut_allele_counts[[i]]=paste(0,0,sep=",")
cut_allele_freqs[[i]]=paste(0,0,sep=",")
rmpop=c(rmpop,i)
} else if(length(sample)==1) {
cut_allele_counts[[i]]=paste(mydat[cum+sample,],2*length(sample)-mydat[cum+sample,],sep=",")
cut_allele_freqs[[i]]=mydat[cum+sample,]/(2*length(sample))
}
else{
cut_allele_counts[[i]]=paste(colSums(mydat[cum+sample,]),2*length(sample)-colSums(mydat[cum+sample,]),sep=",")
cut_allele_freqs[[i]]=colSums(mydat[cum+sample,])/(2*length(sample))
}
cum=cum+max(sample)
}
}
#test:
#sapply(1:6, function(p) sum(allele_counts[[p]]!=cut_allele_counts[[p]]))
#structure on original
obj.snmf<-snmf("~/crap.geno",K = kpops, alpha = 10, project = "new")
## The project is saved into :
##
##
## To load the project, use:
## project = load.snmfProject("")
##
## To remove the project, use:
## remove.snmfProject("")
##
## [1] "*************************************"
## [1] "* sNMF K = 6 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 60
## -L (number of loci) 30002
## -K (number of ancestral pops) 6
## -x (input file) /Users/jri/crap.geno
## -q (individual admixture file) /Users/jri/crap.snmf/K6/run1/crap_r1.6.Q
## -g (ancestral frequencies file) /Users/jri/crap.snmf/K6/run1/crap_r1.6.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 4627730093030073727
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/jri/crap.geno: OK.
##
##
## Main algorithm:
## [ ]
## [=======================]
## Number of iterations: 62
##
## Least-square error: 265607.477285
## Write individual ancestry coefficient file /Users/jri/crap.snmf/K6/run1/crap_r1.6.Q: OK.
## Write ancestral allele frequency coefficient file /Users/jri/crap.snmf/K6/run1/crap_r1.6.G: OK.
##
## The project is saved into :
##
##
## To load the project, use:
## project = load.snmfProject("")
##
## To remove the project, use:
## remove.snmfProject("")
qmatrix = Q(obj.snmf, K = kpops)
bobuncut=gather(data.frame(t(qmatrix)),dude,qval,1:(nrow(qmatrix))) %>%
mutate(pop=factor(rep(1:kpops,nrow(qmatrix))),dude=factor(as.numeric(gsub("X","",dude))))
blank2=c(rep("",8))
blank1=c(rep("",1))
poplabs=c(blank1,"PA",blank2,blank1,"ME",blank2,blank1,"ML",blank2,blank1,"SL",blank2,blank1,"SH",blank2,blank1,"MH",blank2)
uncut<-ggplot(bobuncut,aes(x=dude,y=qval,fill=pop))+
geom_bar(stat="identity")+
theme(
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()
)+
ggtitle("ADMIXTURE")+
scale_fill_manual(guide=FALSE,values=viridis(npops))+
scale_x_discrete(labels=poplabs)+
theme(axis.text=element_text(size=32),plot.title = element_text(size = 40, face = "bold"))
cut<-ggplot(bobcut,aes(x=dude,y=qval,fill=pop))+
geom_bar(stat="identity")+
ggtitle("silhouette cuts")+
theme(axis.text=element_text(size=32),plot.title = element_text(size = 40, face = "bold"))+
theme(axis.text.y=element_blank(), axis.ticks.y=element_blank(),
axis.title.x=element_blank(), axis.title.y=element_blank())+
scale_fill_manual(guide=FALSE,values=viridis(npops)) + scale_x_discrete(labels=poplabs)
bob=list(cut,uncut)
graphlist <- replicate(10, qplot(1,1), simplify = FALSE)
plots<-plot_grid(uncut,cut)
print( plot_grid(plots,ncol=1, rel_heights=c(0.3, 1)))
treemix<-data.frame(matrix(unlist(allele_counts),ncol=npops,byrow=F))
colnames(treemix)=c("PARV","MEX","ML","SL","SH","MH")
write.table(treemix,"treemix_input.txt",quote=F,row.name=F)
system("gzip -f treemix_input.txt")
Make tree
system("~/src/treemix/src/treemix -i ~/gdrive/sandbox/silhouette/treemix_input.txt.gz -o treemix_out -m 1 -root MEX")
plot_tree("~/gdrive/sandbox/silhouette/treemix_out")
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 1 MEX NOT_ROOT NOT_MIG TIP 53 NA NA NA NA
## 2 2 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 52 2 31 1
## 3 3 SL NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 4 4 ML NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 5 15 PARV NOT_ROOT NOT_MIG TIP 16 NA NA NA NA
## 6 16 <NA> NOT_ROOT NOT_MIG NOT_TIP 53 15 1 32 4
## 7 31 MH NOT_ROOT NOT_MIG TIP 2 NA NA NA NA
## 8 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 4 1 2 3
## 9 51 SH NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 2 3 1 51 1
## 11 53 <NA> ROOT NOT_MIG NOT_TIP 53 16 5 1 1
## 12 54 <NA> NOT_ROOT MIG NOT_TIP 53 1 NA NA NA
## V11
## 1 MEX:0.0154882
## 2 ((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257
## 3 SL:0.00113466
## 4 ML:0.00378894
## 5 PARV:0.00379582
## 6 (PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882
## 7 MH:0.00361164
## 8 (ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239
## 9 SH:0.00854118
## 10 (SL:0.00113466,SH:0.00854118):0.00104406
## 11 ((PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882,MEX:0.0154882);
## 12 <NA>
## x y ymin ymax
## 1 0.01548820 0.08333333 0.0000000 0.1666667
## 2 0.04278536 0.33333333 0.1666667 0.6666667
## 3 0.04496408 0.58333333 0.5000000 0.6666667
## 4 0.04640104 0.75000000 0.6666667 0.8333333
## 5 0.01928402 0.91666667 0.8333333 1.0000000
## 6 0.01548820 0.83333333 0.1666667 1.0000000
## 7 0.04537879 0.25000000 0.1666667 0.3333333
## 8 0.04261210 0.66666667 0.1666667 0.8333333
## 9 0.05237060 0.41666667 0.3333333 0.5000000
## 10 0.04382942 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01548820 NA 0.0000000 0.1666667
## [1] "1 0 0.0154882"
## [1] 0.01548820 0.04496408 0.04640104 0.01928402 0.04537879 0.05237060
## [1] 0.003
## [1] "mse 0.0001483573"
## $d
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 1 MEX NOT_ROOT NOT_MIG TIP 53 NA NA NA NA
## 2 2 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 52 2 31 1
## 3 3 SL NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 4 4 ML NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 5 15 PARV NOT_ROOT NOT_MIG TIP 16 NA NA NA NA
## 6 16 <NA> NOT_ROOT NOT_MIG NOT_TIP 53 15 1 32 4
## 7 31 MH NOT_ROOT NOT_MIG TIP 2 NA NA NA NA
## 8 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 4 1 2 3
## 9 51 SH NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 2 3 1 51 1
## 11 53 <NA> ROOT NOT_MIG NOT_TIP 53 16 5 1 1
## 12 54 <NA> NOT_ROOT MIG NOT_TIP 53 1 NA NA NA
## V11
## 1 MEX:0.0154882
## 2 ((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257
## 3 SL:0.00113466
## 4 ML:0.00378894
## 5 PARV:0.00379582
## 6 (PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882
## 7 MH:0.00361164
## 8 (ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239
## 9 SH:0.00854118
## 10 (SL:0.00113466,SH:0.00854118):0.00104406
## 11 ((PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882,MEX:0.0154882);
## 12 <NA>
## x y ymin ymax
## 1 0.01548820 0.08333333 0.0000000 0.1666667
## 2 0.04278536 0.33333333 0.1666667 0.6666667
## 3 0.04496408 0.58333333 0.5000000 0.6666667
## 4 0.04640104 0.75000000 0.6666667 0.8333333
## 5 0.01928402 0.91666667 0.8333333 1.0000000
## 6 0.01548820 0.83333333 0.1666667 1.0000000
## 7 0.04537879 0.25000000 0.1666667 0.3333333
## 8 0.04261210 0.66666667 0.1666667 0.8333333
## 9 0.05237060 0.41666667 0.3333333 0.5000000
## 10 0.04382942 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01548820 0.08333333 0.0000000 0.1666667
##
## $e
## V1 V2 V3 V4 V5 V6 V7
## 1 16 15 0.003795820 1.000000 NOT_MIG 0 1.000000
## 2 2 52 0.001044060 1.000000 NOT_MIG 0 1.000000
## 3 52 3 0.001134660 1.000000 NOT_MIG 0 1.000000
## 4 52 51 0.008541180 1.000000 NOT_MIG 0 1.000000
## 5 16 32 0.027123900 1.000000 NOT_MIG 0 1.000000
## 6 53 16 0.015488200 1.000000 NOT_MIG 0 1.000000
## 7 53 54 0.015488200 1.000000 NOT_MIG 0 1.000000
## 8 54 1 0.000000000 1.000000 NOT_MIG 1 1.000000
## 9 54 31 0.000000000 0.152606 MIG 1 0.000000
## 10 32 4 0.003788940 1.000000 NOT_MIG 0 1.000000
## 11 32 2 0.000173257 1.000000 NOT_MIG 0 1.000000
## 12 2 31 0.002593434 0.847394 NOT_MIG 0 0.718077
treemix<-data.frame(matrix(unlist(cut_allele_counts),ncol=npops,byrow=F))
colnames(treemix)=c("PARV","MEX","ML","SL","SH","MH")
write.table(treemix,"treemix_cut_input.txt",quote=F,row.name=F)
system("gzip -f treemix_cut_input.txt")
Make tree
system("~/src/treemix/src/treemix -i ~/gdrive/sandbox/silhouette/treemix_cut_input.txt.gz -o treemix_cut_out -m 1 -root MEX")
plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out")
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 0 <NA> NOT_ROOT NOT_MIG NOT_TIP 2 15 1 32 3
## 2 1 PARV NOT_ROOT NOT_MIG TIP 2 NA NA NA NA
## 3 2 <NA> NOT_ROOT NOT_MIG NOT_TIP 53 1 1 0 4
## 4 3 MEX NOT_ROOT NOT_MIG TIP 53 NA NA NA NA
## 5 4 MH NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 6 15 ML NOT_ROOT NOT_MIG TIP 0 NA NA NA NA
## 7 31 SL NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 8 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 0 52 2 4 1
## 9 51 SH NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 31 1 51 1
## 11 53 <NA> ROOT NOT_MIG NOT_TIP 53 2 5 3 1
## 12 54 <NA> NOT_ROOT MIG NOT_TIP 53 3 NA NA NA
## V11
## 1 (ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404
## 2 PARV:0.00456124
## 3 (PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373
## 4 MEX:0.0136373
## 5 MH:0.003726
## 6 ML:0.00389898
## 7 SL:0.00115514
## 8 ((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241
## 9 SH:0.00852224
## 10 (SL:0.00115514,SH:0.00852224):0.000940277
## 11 ((PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373,MEX:0.0136373);
## 12 <NA>
## x y ymin ymax
## 1 0.04017770 0.66666667 0.1666667 0.8333333
## 2 0.01819854 0.91666667 0.8333333 1.0000000
## 3 0.01363730 0.83333333 0.1666667 1.0000000
## 4 0.01363730 0.08333333 0.0000000 0.1666667
## 5 0.04291548 0.25000000 0.1666667 0.3333333
## 6 0.04407668 0.75000000 0.6666667 0.8333333
## 7 0.04243636 0.58333333 0.5000000 0.6666667
## 8 0.04034094 0.33333333 0.1666667 0.6666667
## 9 0.04980346 0.41666667 0.3333333 0.5000000
## 10 0.04128122 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01363730 NA 0.0000000 0.1666667
## [1] "1 0 0.0136373"
## [1] 0.01819854 0.01363730 0.04291548 0.04407668 0.04243636 0.04980346
## [1] 0.003
## [1] "mse 0.000173432172222222"
## $d
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 0 <NA> NOT_ROOT NOT_MIG NOT_TIP 2 15 1 32 3
## 2 1 PARV NOT_ROOT NOT_MIG TIP 2 NA NA NA NA
## 3 2 <NA> NOT_ROOT NOT_MIG NOT_TIP 53 1 1 0 4
## 4 3 MEX NOT_ROOT NOT_MIG TIP 53 NA NA NA NA
## 5 4 MH NOT_ROOT NOT_MIG TIP 32 NA NA NA NA
## 6 15 ML NOT_ROOT NOT_MIG TIP 0 NA NA NA NA
## 7 31 SL NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 8 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 0 52 2 4 1
## 9 51 SH NOT_ROOT NOT_MIG TIP 52 NA NA NA NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 31 1 51 1
## 11 53 <NA> ROOT NOT_MIG NOT_TIP 53 2 5 3 1
## 12 54 <NA> NOT_ROOT MIG NOT_TIP 53 3 NA NA NA
## V11
## 1 (ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404
## 2 PARV:0.00456124
## 3 (PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373
## 4 MEX:0.0136373
## 5 MH:0.003726
## 6 ML:0.00389898
## 7 SL:0.00115514
## 8 ((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241
## 9 SH:0.00852224
## 10 (SL:0.00115514,SH:0.00852224):0.000940277
## 11 ((PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373,MEX:0.0136373);
## 12 <NA>
## x y ymin ymax
## 1 0.04017770 0.66666667 0.1666667 0.8333333
## 2 0.01819854 0.91666667 0.8333333 1.0000000
## 3 0.01363730 0.83333333 0.1666667 1.0000000
## 4 0.01363730 0.08333333 0.0000000 0.1666667
## 5 0.04291548 0.25000000 0.1666667 0.3333333
## 6 0.04407668 0.75000000 0.6666667 0.8333333
## 7 0.04243636 0.58333333 0.5000000 0.6666667
## 8 0.04034094 0.33333333 0.1666667 0.6666667
## 9 0.04980346 0.41666667 0.3333333 0.5000000
## 10 0.04128122 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01363730 0.08333333 0.0000000 0.1666667
##
## $e
## V1 V2 V3 V4 V5 V6 V7
## 1 2 1 0.004561240 1.000000 NOT_MIG 0 1.000000
## 2 32 52 0.000940277 1.000000 NOT_MIG 0 1.000000
## 3 52 31 0.001155140 1.000000 NOT_MIG 0 1.000000
## 4 52 51 0.008522240 1.000000 NOT_MIG 0 1.000000
## 5 2 0 0.026540400 1.000000 NOT_MIG 0 1.000000
## 6 53 2 0.013637300 1.000000 NOT_MIG 0 1.000000
## 7 53 54 0.013637300 1.000000 NOT_MIG 0 1.000000
## 8 54 3 0.000000000 1.000000 NOT_MIG 1 1.000000
## 9 54 4 0.000000000 0.168756 MIG 1 0.000000
## 10 0 15 0.003898980 1.000000 NOT_MIG 0 1.000000
## 11 0 32 0.000163241 1.000000 NOT_MIG 0 1.000000
## 12 32 4 0.002574542 0.831244 NOT_MIG 0 0.690967
Get Data
system("~/src/treemix/src/fourpop -i ~/gdrive/sandbox/silhouette/treemix_input.txt.gz -k 100 | grep -Ev 'Estimating|total|npop' | sed -e 's/,/ /g;s/;/ /' > ./for_admix.txt")
OG<-read.table("./for_admix.txt",header=F)
colnames(OG)=c("W","X","Y","Z","D","seD","Z.value")
OG <- OG[,c(1:5,7)]
plot(f4stats(OG))
Silhouette cuts
system("~/src/treemix/src/fourpop -i ~/gdrive/sandbox/silhouette/treemix_cut_input.txt.gz -k 100 | grep -Ev 'Estimating|total|npop' | sed -e 's/,/ /g;s/;/ /' > ./for_admix_cut.txt")
cutdat<-read.table("./for_admix_cut.txt",header=F)
colnames(cutdat)=c("W","X","Y","Z","D","seD","Z.value")
cutdat <- cutdat[,c(1:5,7)]
plot(f4stats(cutdat))
Fit graphs, plot graph with lowest error. First on OG data:
ncores=3 #cpu to use
fits=fit_graph_list(OG,graph_list,ncores)
fits_errors=pbsapply(1:length(fits), function(i) (fits[[i]]$best_error))
plot(fits[[which(fits_errors==min(fits_errors))]]$graph)
plot(fit_graph(OG,fits[[which(fits_errors==min(fits_errors))]]$graph))
plot(fit_graph(OG,graph))
Then on silhouette scores:
ncores=3 #cpu to use
fits=fit_graph_list(cutdat,graph_list,ncores)
fits_errors=pbsapply(1:length(fits), function(i) (fits[[i]]$best_error))
plot(fits[[which(fits_errors==min(fits_errors))]]$graph)
plot(fit_graph(cutdat,fits[[which(fits_errors==min(fits_errors))]]$graph))
plot(fit_graph(cutdat,graph))