Setup

Functions and libraries

library(tidyverse)
library(cowplot)
library(admixturegraph)
library(neldermead)
library(doParallel)
library(parallel)
library(foreach)
library(pbapply)
library(cluster)
library(viridis)
source("plotting_funcs.R")
library(LEA)

Functions

gendata<-function(migration_command,n,ninds){
  hapdat<-matrix(unlist(lapply(strsplit(system(paste("mspms ", n*ninds, " 1 -t 4500 -r 4500 4500 -I ", n, " ", paste(rep(ninds,n),collapse=" ") , " ", migration_command, " | tail -", n*ninds,sep=""),intern=T),''),as.numeric)),nrow=n*ninds,byrow=T) # run coalescent sim

#2 GHOST POP
#hapdat<-matrix(unlist(lapply(strsplit(system(paste("mspms ", n*ninds, " 1 -t 450 -r 450 450 -I ", n+2, " ", paste(c(rep(ninds,n),rep(0,2)),collapse=" ") , " ", migration_command, " | tail -", n*ninds,sep=""),intern=T),''),as.numeric)),nrow=n*ninds,byrow=T) # run coalescent sim

paste(c(rep(ninds,n),rep(0,2)))
  
  genodat<-hapdat[seq(1,n*ninds,2),]+hapdat[seq(2,n*ninds,2),] # make diploid 
  
  write.table(t(genodat),file="~/crap.geno", quote=F, row.names=F, col.names=F, sep="") #write genotype file
  return(genodat)
}

'%!in%' <- function(x,y)!('%in%'(x,y)) # "not a member of"" function

poly<-function(x){ if(x>0 && x<1){ return(TRUE)} else{return(FALSE)}} # find polymorphic loci

f4_calc<-function(perms,list_of_freqs,locus){
  freqs<-sapply(perms,function(z) list_of_freqs[[z]][locus])
  f4_freqs=freqs[perm]
  return((f4_freqs[1]-f4_freqs[2])*(f4_freqs[3]-f4_freqs[4]))
} #calculate f4 stats (not used)

Simulation

Coalescent Simulation

npops=6
ninds=20
kpops=6

#model w/ admixture but no migration
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -es 0.07 6 0.8 -ej 0.07 6 3 -ej 0.07 7 2 -ej 0.1 3 1 -ej 0.6 2 1")

#model with admixture and migration 
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -m 5 4 .5 -m 4 5 .5 -m 4 3 .5 -m 3 4 .5 -m 2 6 .1 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -es 0.07 6 0.8 -ej 0.07 6 3 -ej 0.07 7 2 -ej 0.1 3 1 -ej 0.6 2 1")

#model migration with no pop split
migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -m 6 2 10 -g 3 200 -ej 0.0075 5 4 -ej 0.01 4 3 -ej 0.011 6 3 -ej 0.02 3 1 -ej 0.12 2 1")

#nothing
#migration_command=paste("-n 4 0.8 -n 5 0.1 -n 6 0.8 -g 3 5.0 -ej 0.04 5 4 -ej 0.06 4 3 -ej 0.07 6 3 -ej 0.1 3 1 -ej 0.6 2 1")

mydat<-gendata(migration_command,npops,ninds)

Set up admix graph

Set Tree

leaves <- c("SH", "SL", "MH", "ML","PARV","MEX")
inner_nodes <- c("S", "M", "SM","PM","PX")
edges <- parent_edges(c(edge("SH", "S"),
                        edge("SL", "S"),
                        edge("ML", "M"),
                     #  admixture_edge("oMH", "MM","M", "α"),
                        edge("MH", "M"),                          
                        edge("M","SM"),
                    #   edge("MH","oMH"),
                        edge("S","SM"),
                        edge("PARV","PM"),
                        edge("SM","PM"),
                        edge("PM","PX"),
                        edge("MEX","PX")
                        ))
graph <- agraph(leaves, inner_nodes, edges)
make_an_outgroup(graph, "MEx")
plot(graph, show_inner_node_labels = TRUE, show_admixture_labels = TRUE)

Make list of trees

graph_list <- add_an_admixture(graph, "MIX")

Silhouette scores

Make distance matrix

nloci=dim(mydat)[2]
ibs_mat<-as.matrix(dist(mydat,method="manhattan",diag=TRUE,upper=TRUE)/nloci)
pops=c("PARV","MEX","ML","SL","SH","MH")
for_sil=data.frame(paste(1:(npops*ninds/2),"@",c(sapply(pops,function(j) rep(j,ninds/2))),sep=""),ibs_mat)
write.table(for_sil,file="sim_dist_matrix.txt",col.names=FALSE,row.names=FALSE,quote=FALSE,sep="\t")

Calculate silhoutte scores, make graphs

system("perl silhouette.pl -i sim_dist_matrix.txt -o temp -g")

Get individuals cut due to silhouette scores

#read in silscores
tempcut<-data.frame(read.table("temp_ind.sil",header=F),expand.grid(1:(ninds/2),1:npops))
colnames(tempcut)=c("pop","silscore","samplenum","popnum")

#get list of inds with silscores<0
cuts=sapply(1:npops, function(p) filter(tempcut,silscore<0 & popnum==p)$samplenum)
## Warning: package 'bindrcpp' was built under R version 3.3.2
#cut based on silhouette scores
keep<-sort(which(silhouette(kmeans(dist(mydat),npops)[[1]],dist(mydat))[,3]>0))

left_out<-which(!(1:(npops*ninds/2) %in% keep))

#####TESTING ONLY PLEASE DELETE
#####TESTING ONLY PLEASE DELETE
#left_out<-c(51:58)
#####TESTING ONLY PLEASE DELETE
#####TESTING ONLY PLEASE DELETE

cut_out<-paste(left_out,tempcut$pop[left_out])

if(length(left_out)>0){ 
  print(paste("The following",length(left_out),"individuals were removed:"))
  print(cut_out)
} else {
  print("Nobody cut.")
}
## [1] "The following 7 individuals were removed:"
## [1] "51 SL" "52 SL" "53 SL" "56 SL" "57 SL" "59 SL" "60 SL"
#make new data from cut
cutdat<-mydat[keep,] 
write.table(t(cutdat),file="~/crapcut.geno",quote=F,row.names=F,col.names=F,sep="")

Allele counts

Allele counts without silhouette cuts

allele_counts=list()
allele_freqs=list()
cumulative=0
rmpop=as.numeric()
for(i in 1:(npops)){
  sample=1:(ninds/2)
  if(length(sample)==0){  
      allele_counts[[i]]=paste(0,0,sep=",")
      allele_freqs[[i]]=paste(0,0,sep=",")
      rmpop=c(rmpop,i)
  } else if(length(sample)==1) {
    allele_counts[[i]]=paste(mydat[cumulative+sample,],2*length(sample)-mydat[cumulative+sample,],sep=",")
    allele_freqs[[i]]=mydat[cumulative+sample,]/(2*length(sample))
  }
  else{
    allele_counts[[i]]=paste(colSums(mydat[cumulative+sample,]),2*length(sample)-colSums(mydat[cumulative+sample,]),sep=",")
    allele_freqs[[i]]=colSums(mydat[cumulative+sample,])/(2*length(sample))
  }
  cumulative=cumulative+max(sample)
}

Allele counts with silhouette cuts

cut_allele_counts=list()
cut_allele_freqs=list()
cum=0
rmpop=as.numeric()
if(length(left_out)==0){ 
  cut_allele_counts=allele_counts
  cut_allele_freqs=allele_freqs
} else{
  for(i in 1:(npops)){
    sample=which(1:(ninds/2) %!in% cuts[[i]])
    if(length(sample)==0){  
      cut_allele_counts[[i]]=paste(0,0,sep=",")
      cut_allele_freqs[[i]]=paste(0,0,sep=",")
      rmpop=c(rmpop,i)
    } else if(length(sample)==1) {
      cut_allele_counts[[i]]=paste(mydat[cum+sample,],2*length(sample)-mydat[cum+sample,],sep=",")
      cut_allele_freqs[[i]]=mydat[cum+sample,]/(2*length(sample))
    }
    else{
      cut_allele_counts[[i]]=paste(colSums(mydat[cum+sample,]),2*length(sample)-colSums(mydat[cum+sample,]),sep=",")
      cut_allele_freqs[[i]]=colSums(mydat[cum+sample,])/(2*length(sample))
    }
    cum=cum+max(sample)
  }
}

#test: 
#sapply(1:6, function(p) sum(allele_counts[[p]]!=cut_allele_counts[[p]]))

Structure Plots

Uncut

#structure on original
obj.snmf<-snmf("~/crap.geno",K = kpops, alpha = 10, project = "new") 
## The project is saved into :
##   
## 
## To load the project, use:
##  project = load.snmfProject("")
## 
## To remove the project, use:
##  remove.snmfProject("")
## 
## [1] "*************************************"
## [1] "* sNMF K = 6  repetition 1      *"
## [1] "*************************************"
## summary of the options:
## 
##         -n (number of individuals)             60
##         -L (number of loci)                    30002
##         -K (number of ancestral pops)          6
##         -x (input file)                        /Users/jri/crap.geno
##         -q (individual admixture file)         /Users/jri/crap.snmf/K6/run1/crap_r1.6.Q
##         -g (ancestral frequencies file)        /Users/jri/crap.snmf/K6/run1/crap_r1.6.G
##         -i (number max of iterations)          200
##         -a (regularization parameter)          10
##         -s (seed random init)                  4627730093030073727
##         -e (tolerance error)                   1E-05
##         -p (number of processes)               1
##         - diploid
## 
## Read genotype file /Users/jri/crap.geno:     OK.
## 
## 
## Main algorithm:
##  [                                                                           ]
##  [=======================]
## Number of iterations: 62
## 
## Least-square error: 265607.477285
## Write individual ancestry coefficient file /Users/jri/crap.snmf/K6/run1/crap_r1.6.Q:     OK.
## Write ancestral allele frequency coefficient file /Users/jri/crap.snmf/K6/run1/crap_r1.6.G:  OK.
## 
## The project is saved into :
##   
## 
## To load the project, use:
##  project = load.snmfProject("")
## 
## To remove the project, use:
##  remove.snmfProject("")
qmatrix = Q(obj.snmf, K = kpops)
bobuncut=gather(data.frame(t(qmatrix)),dude,qval,1:(nrow(qmatrix))) %>%
mutate(pop=factor(rep(1:kpops,nrow(qmatrix))),dude=factor(as.numeric(gsub("X","",dude))))

Cut

Plots

blank2=c(rep("",8))
blank1=c(rep("",1))
poplabs=c(blank1,"PA",blank2,blank1,"ME",blank2,blank1,"ML",blank2,blank1,"SL",blank2,blank1,"SH",blank2,blank1,"MH",blank2)

uncut<-ggplot(bobuncut,aes(x=dude,y=qval,fill=pop))+
    geom_bar(stat="identity")+
    theme(
      axis.text.y=element_blank(),
      axis.ticks.y=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()
    )+
    ggtitle("ADMIXTURE")+
    scale_fill_manual(guide=FALSE,values=viridis(npops))+
    scale_x_discrete(labels=poplabs)+
    theme(axis.text=element_text(size=32),plot.title = element_text(size = 40, face = "bold"))

cut<-ggplot(bobcut,aes(x=dude,y=qval,fill=pop))+
  geom_bar(stat="identity")+
  ggtitle("silhouette cuts")+
      theme(axis.text=element_text(size=32),plot.title = element_text(size = 40, face = "bold"))+
  theme(axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        axis.title.x=element_blank(), axis.title.y=element_blank())+
  scale_fill_manual(guide=FALSE,values=viridis(npops)) +    scale_x_discrete(labels=poplabs)



bob=list(cut,uncut)
graphlist <- replicate(10, qplot(1,1), simplify = FALSE)

plots<-plot_grid(uncut,cut)
print( plot_grid(plots,ncol=1, rel_heights=c(0.3, 1)))

Treemix

Uncut

treemix<-data.frame(matrix(unlist(allele_counts),ncol=npops,byrow=F))
colnames(treemix)=c("PARV","MEX","ML","SL","SH","MH")
write.table(treemix,"treemix_input.txt",quote=F,row.name=F)
system("gzip -f treemix_input.txt")

Make tree

system("~/src/treemix/src/treemix -i ~/gdrive/sandbox/silhouette/treemix_input.txt.gz -o treemix_out -m 1 -root MEX")
plot_tree("~/gdrive/sandbox/silhouette/treemix_out")
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion

## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion

## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion

## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_out"): NAs
## introduced by coercion
##    V1   V2       V3      V4      V5 V6 V7 V8 V9 V10
## 1   1  MEX NOT_ROOT NOT_MIG     TIP 53 NA NA NA  NA
## 2   2 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 52  2 31   1
## 3   3   SL NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 4   4   ML NOT_ROOT NOT_MIG     TIP 32 NA NA NA  NA
## 5  15 PARV NOT_ROOT NOT_MIG     TIP 16 NA NA NA  NA
## 6  16 <NA> NOT_ROOT NOT_MIG NOT_TIP 53 15  1 32   4
## 7  31   MH NOT_ROOT NOT_MIG     TIP  2 NA NA NA  NA
## 8  32 <NA> NOT_ROOT NOT_MIG NOT_TIP 16  4  1  2   3
## 9  51   SH NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP  2  3  1 51   1
## 11 53 <NA>     ROOT NOT_MIG NOT_TIP 53 16  5  1   1
## 12 54 <NA> NOT_ROOT     MIG NOT_TIP 53  1 NA NA  NA
##                                                                                                                                            V11
## 1                                                                                                                                MEX:0.0154882
## 2                                                                         ((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257
## 3                                                                                                                                SL:0.00113466
## 4                                                                                                                                ML:0.00378894
## 5                                                                                                                              PARV:0.00379582
## 6                   (PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882
## 7                                                                                                                                MH:0.00361164
## 8                                               (ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239
## 9                                                                                                                                SH:0.00854118
## 10                                                                                                    (SL:0.00113466,SH:0.00854118):0.00104406
## 11 ((PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882,MEX:0.0154882);
## 12                                                                                                                                        <NA>
##             x          y      ymin      ymax
## 1  0.01548820 0.08333333 0.0000000 0.1666667
## 2  0.04278536 0.33333333 0.1666667 0.6666667
## 3  0.04496408 0.58333333 0.5000000 0.6666667
## 4  0.04640104 0.75000000 0.6666667 0.8333333
## 5  0.01928402 0.91666667 0.8333333 1.0000000
## 6  0.01548820 0.83333333 0.1666667 1.0000000
## 7  0.04537879 0.25000000 0.1666667 0.3333333
## 8  0.04261210 0.66666667 0.1666667 0.8333333
## 9  0.05237060 0.41666667 0.3333333 0.5000000
## 10 0.04382942 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01548820         NA 0.0000000 0.1666667
## [1] "1 0 0.0154882"

## [1] 0.01548820 0.04496408 0.04640104 0.01928402 0.04537879 0.05237060
## [1] 0.003
## [1] "mse 0.0001483573"
## $d
##    V1   V2       V3      V4      V5 V6 V7 V8 V9 V10
## 1   1  MEX NOT_ROOT NOT_MIG     TIP 53 NA NA NA  NA
## 2   2 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 52  2 31   1
## 3   3   SL NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 4   4   ML NOT_ROOT NOT_MIG     TIP 32 NA NA NA  NA
## 5  15 PARV NOT_ROOT NOT_MIG     TIP 16 NA NA NA  NA
## 6  16 <NA> NOT_ROOT NOT_MIG NOT_TIP 53 15  1 32   4
## 7  31   MH NOT_ROOT NOT_MIG     TIP  2 NA NA NA  NA
## 8  32 <NA> NOT_ROOT NOT_MIG NOT_TIP 16  4  1  2   3
## 9  51   SH NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP  2  3  1 51   1
## 11 53 <NA>     ROOT NOT_MIG NOT_TIP 53 16  5  1   1
## 12 54 <NA> NOT_ROOT     MIG NOT_TIP 53  1 NA NA  NA
##                                                                                                                                            V11
## 1                                                                                                                                MEX:0.0154882
## 2                                                                         ((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257
## 3                                                                                                                                SL:0.00113466
## 4                                                                                                                                ML:0.00378894
## 5                                                                                                                              PARV:0.00379582
## 6                   (PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882
## 7                                                                                                                                MH:0.00361164
## 8                                               (ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239
## 9                                                                                                                                SH:0.00854118
## 10                                                                                                    (SL:0.00113466,SH:0.00854118):0.00104406
## 11 ((PARV:0.00379582,(ML:0.00378894,((SL:0.00113466,SH:0.00854118):0.00104406,MH:0.00361164):0.000173257):0.0271239):0.0154882,MEX:0.0154882);
## 12                                                                                                                                        <NA>
##             x          y      ymin      ymax
## 1  0.01548820 0.08333333 0.0000000 0.1666667
## 2  0.04278536 0.33333333 0.1666667 0.6666667
## 3  0.04496408 0.58333333 0.5000000 0.6666667
## 4  0.04640104 0.75000000 0.6666667 0.8333333
## 5  0.01928402 0.91666667 0.8333333 1.0000000
## 6  0.01548820 0.83333333 0.1666667 1.0000000
## 7  0.04537879 0.25000000 0.1666667 0.3333333
## 8  0.04261210 0.66666667 0.1666667 0.8333333
## 9  0.05237060 0.41666667 0.3333333 0.5000000
## 10 0.04382942 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01548820 0.08333333 0.0000000 0.1666667
## 
## $e
##    V1 V2          V3       V4      V5 V6       V7
## 1  16 15 0.003795820 1.000000 NOT_MIG  0 1.000000
## 2   2 52 0.001044060 1.000000 NOT_MIG  0 1.000000
## 3  52  3 0.001134660 1.000000 NOT_MIG  0 1.000000
## 4  52 51 0.008541180 1.000000 NOT_MIG  0 1.000000
## 5  16 32 0.027123900 1.000000 NOT_MIG  0 1.000000
## 6  53 16 0.015488200 1.000000 NOT_MIG  0 1.000000
## 7  53 54 0.015488200 1.000000 NOT_MIG  0 1.000000
## 8  54  1 0.000000000 1.000000 NOT_MIG  1 1.000000
## 9  54 31 0.000000000 0.152606     MIG  1 0.000000
## 10 32  4 0.003788940 1.000000 NOT_MIG  0 1.000000
## 11 32  2 0.000173257 1.000000 NOT_MIG  0 1.000000
## 12  2 31 0.002593434 0.847394 NOT_MIG  0 0.718077

Cut

treemix<-data.frame(matrix(unlist(cut_allele_counts),ncol=npops,byrow=F))
colnames(treemix)=c("PARV","MEX","ML","SL","SH","MH")
write.table(treemix,"treemix_cut_input.txt",quote=F,row.name=F)
system("gzip -f treemix_cut_input.txt")

Make tree

system("~/src/treemix/src/treemix -i ~/gdrive/sandbox/silhouette/treemix_cut_input.txt.gz -o treemix_cut_out -m 1 -root MEX")
plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out")
## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion

## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion

## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion

## Warning in plot_tree("~/gdrive/sandbox/silhouette/treemix_cut_out"): NAs
## introduced by coercion
##    V1   V2       V3      V4      V5 V6 V7 V8 V9 V10
## 1   0 <NA> NOT_ROOT NOT_MIG NOT_TIP  2 15  1 32   3
## 2   1 PARV NOT_ROOT NOT_MIG     TIP  2 NA NA NA  NA
## 3   2 <NA> NOT_ROOT NOT_MIG NOT_TIP 53  1  1  0   4
## 4   3  MEX NOT_ROOT NOT_MIG     TIP 53 NA NA NA  NA
## 5   4   MH NOT_ROOT NOT_MIG     TIP 32 NA NA NA  NA
## 6  15   ML NOT_ROOT NOT_MIG     TIP  0 NA NA NA  NA
## 7  31   SL NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 8  32 <NA> NOT_ROOT NOT_MIG NOT_TIP  0 52  2  4   1
## 9  51   SH NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 31  1 51   1
## 11 53 <NA>     ROOT NOT_MIG NOT_TIP 53  2  5  3   1
## 12 54 <NA> NOT_ROOT     MIG NOT_TIP 53  3 NA NA  NA
##                                                                                                                                           V11
## 1                                               (ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404
## 2                                                                                                                             PARV:0.00456124
## 3                   (PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373
## 4                                                                                                                               MEX:0.0136373
## 5                                                                                                                                 MH:0.003726
## 6                                                                                                                               ML:0.00389898
## 7                                                                                                                               SL:0.00115514
## 8                                                                         ((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241
## 9                                                                                                                               SH:0.00852224
## 10                                                                                                  (SL:0.00115514,SH:0.00852224):0.000940277
## 11 ((PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373,MEX:0.0136373);
## 12                                                                                                                                       <NA>
##             x          y      ymin      ymax
## 1  0.04017770 0.66666667 0.1666667 0.8333333
## 2  0.01819854 0.91666667 0.8333333 1.0000000
## 3  0.01363730 0.83333333 0.1666667 1.0000000
## 4  0.01363730 0.08333333 0.0000000 0.1666667
## 5  0.04291548 0.25000000 0.1666667 0.3333333
## 6  0.04407668 0.75000000 0.6666667 0.8333333
## 7  0.04243636 0.58333333 0.5000000 0.6666667
## 8  0.04034094 0.33333333 0.1666667 0.6666667
## 9  0.04980346 0.41666667 0.3333333 0.5000000
## 10 0.04128122 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01363730         NA 0.0000000 0.1666667
## [1] "1 0 0.0136373"

## [1] 0.01819854 0.01363730 0.04291548 0.04407668 0.04243636 0.04980346
## [1] 0.003
## [1] "mse 0.000173432172222222"
## $d
##    V1   V2       V3      V4      V5 V6 V7 V8 V9 V10
## 1   0 <NA> NOT_ROOT NOT_MIG NOT_TIP  2 15  1 32   3
## 2   1 PARV NOT_ROOT NOT_MIG     TIP  2 NA NA NA  NA
## 3   2 <NA> NOT_ROOT NOT_MIG NOT_TIP 53  1  1  0   4
## 4   3  MEX NOT_ROOT NOT_MIG     TIP 53 NA NA NA  NA
## 5   4   MH NOT_ROOT NOT_MIG     TIP 32 NA NA NA  NA
## 6  15   ML NOT_ROOT NOT_MIG     TIP  0 NA NA NA  NA
## 7  31   SL NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 8  32 <NA> NOT_ROOT NOT_MIG NOT_TIP  0 52  2  4   1
## 9  51   SH NOT_ROOT NOT_MIG     TIP 52 NA NA NA  NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 31  1 51   1
## 11 53 <NA>     ROOT NOT_MIG NOT_TIP 53  2  5  3   1
## 12 54 <NA> NOT_ROOT     MIG NOT_TIP 53  3 NA NA  NA
##                                                                                                                                           V11
## 1                                               (ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404
## 2                                                                                                                             PARV:0.00456124
## 3                   (PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373
## 4                                                                                                                               MEX:0.0136373
## 5                                                                                                                                 MH:0.003726
## 6                                                                                                                               ML:0.00389898
## 7                                                                                                                               SL:0.00115514
## 8                                                                         ((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241
## 9                                                                                                                               SH:0.00852224
## 10                                                                                                  (SL:0.00115514,SH:0.00852224):0.000940277
## 11 ((PARV:0.00456124,(ML:0.00389898,((SL:0.00115514,SH:0.00852224):0.000940277,MH:0.003726):0.000163241):0.0265404):0.0136373,MEX:0.0136373);
## 12                                                                                                                                       <NA>
##             x          y      ymin      ymax
## 1  0.04017770 0.66666667 0.1666667 0.8333333
## 2  0.01819854 0.91666667 0.8333333 1.0000000
## 3  0.01363730 0.83333333 0.1666667 1.0000000
## 4  0.01363730 0.08333333 0.0000000 0.1666667
## 5  0.04291548 0.25000000 0.1666667 0.3333333
## 6  0.04407668 0.75000000 0.6666667 0.8333333
## 7  0.04243636 0.58333333 0.5000000 0.6666667
## 8  0.04034094 0.33333333 0.1666667 0.6666667
## 9  0.04980346 0.41666667 0.3333333 0.5000000
## 10 0.04128122 0.50000000 0.3333333 0.6666667
## 11 0.00000000 0.16666667 0.0000000 1.0000000
## 12 0.01363730 0.08333333 0.0000000 0.1666667
## 
## $e
##    V1 V2          V3       V4      V5 V6       V7
## 1   2  1 0.004561240 1.000000 NOT_MIG  0 1.000000
## 2  32 52 0.000940277 1.000000 NOT_MIG  0 1.000000
## 3  52 31 0.001155140 1.000000 NOT_MIG  0 1.000000
## 4  52 51 0.008522240 1.000000 NOT_MIG  0 1.000000
## 5   2  0 0.026540400 1.000000 NOT_MIG  0 1.000000
## 6  53  2 0.013637300 1.000000 NOT_MIG  0 1.000000
## 7  53 54 0.013637300 1.000000 NOT_MIG  0 1.000000
## 8  54  3 0.000000000 1.000000 NOT_MIG  1 1.000000
## 9  54  4 0.000000000 0.168756     MIG  1 0.000000
## 10  0 15 0.003898980 1.000000 NOT_MIG  0 1.000000
## 11  0 32 0.000163241 1.000000 NOT_MIG  0 1.000000
## 12 32  4 0.002574542 0.831244 NOT_MIG  0 0.690967

Admixture Graphs

Get Data

system("~/src/treemix/src/fourpop -i ~/gdrive/sandbox/silhouette/treemix_input.txt.gz -k 100 | grep -Ev 'Estimating|total|npop' | sed -e 's/,/ /g;s/;/ /' > ./for_admix.txt")
OG<-read.table("./for_admix.txt",header=F) 
colnames(OG)=c("W","X","Y","Z","D","seD","Z.value")
OG <- OG[,c(1:5,7)]
plot(f4stats(OG))

Silhouette cuts

system("~/src/treemix/src/fourpop -i ~/gdrive/sandbox/silhouette/treemix_cut_input.txt.gz -k 100 | grep -Ev 'Estimating|total|npop' | sed -e 's/,/ /g;s/;/ /' > ./for_admix_cut.txt")
cutdat<-read.table("./for_admix_cut.txt",header=F) 
colnames(cutdat)=c("W","X","Y","Z","D","seD","Z.value")
cutdat <- cutdat[,c(1:5,7)]
plot(f4stats(cutdat))

Fit graphs, plot graph with lowest error. First on OG data:

ncores=3 #cpu to use
fits=fit_graph_list(OG,graph_list,ncores)
fits_errors=pbsapply(1:length(fits), function(i) (fits[[i]]$best_error))
plot(fits[[which(fits_errors==min(fits_errors))]]$graph)

plot(fit_graph(OG,fits[[which(fits_errors==min(fits_errors))]]$graph))

plot(fit_graph(OG,graph))

Then on silhouette scores:

ncores=3 #cpu to use
fits=fit_graph_list(cutdat,graph_list,ncores)
fits_errors=pbsapply(1:length(fits), function(i) (fits[[i]]$best_error))
plot(fits[[which(fits_errors==min(fits_errors))]]$graph)

plot(fit_graph(cutdat,fits[[which(fits_errors==min(fits_errors))]]$graph))

plot(fit_graph(cutdat,graph))