Functions

Create random haplotype

ran.hap=function(numloci,p){sapply(1:numloci,function(x) rbinom(1,1,p[x]))}

Copy haplotype from a mom

Assumes uniform recombination rate. Note recombination is not done correctly.

copy.mom=function(numloci,mom){ 
  rec=round(runif(1,2,numloci-1)) #recombination breakpoint
    k1.1=mom[[rbinom(1,1,.5)+1]][1:rec] #copy 1->rec from mom
    k1.2=mom[[rbinom(1,1,.5)+1]][(rec+1):numloci] #copy rec->numloci from mom
    k1=c(k1.1,k1.2)
    return(k1)
}

Get data from diploid mom

#gets diploid haps over range
haps<-function(mom,loci){
  return(list(mom[[1]][loci],mom[[2]][loci]))
}

#gets genotypes over range
gens<-function(mom,loci){
  return(sapply(loci,function(a) sum(sapply(mom,"[[",a))))
}

Create 1 kid

Error model assumes heterozygotes have equal likelihood of being called either homozygotes

kid=function(mom,dad,numloci,p,selfing,error){
    k1=copy.mom(numloci,mom)
  k2=copy.mom(numloci,dad)

    truekid=k1+k2
  hets_with_error=sample(which(truekid==1),round(het.error*length(which(truekid==1))))
    truekid=replace(truekid,hets_with_error,sample(c(0,2),length(hets_with_error),replace=T)  )
  return(truekid)
}

Likelihood of mom/dad/kid combo

like<-function(momallele,dad_genotype,kidgeno,probs){
  return(probs[[dad_genotype+1]][momallele+1,kidgeno+1])
}

Likelihood of repulsion or linkage phase

#locus and #prior_locus are the two loci of interest. usually from phase_loci
repolink<-function(locus,prior_locus,dad_genos,progeny,emom,rec,probs){ 
  kidgenos=list(sapply(progeny,"[[",prior_locus),sapply(progeny,"[[",locus)  ) #list with progeny genotypes at first het locus and at 2nd het locus
  config10<-function(kidgenos,dad_genos,locus,x) { like(1,dad_genos[[x]][prior_locus],kidgenos[[1]][x],probs)*like(0,dad_genos[[x]][locus],kidgenos[[2]][x],probs) } # x is each kid
  config11<-function(kidgenos,dad_genos,locus,x) { like(1,dad_genos[[x]][prior_locus],kidgenos[[1]][x],probs)*like(1,dad_genos[[x]][locus],kidgenos[[2]][x],probs) }
  config01<-function(kidgenos,dad_genos,locus,x) { like(0,dad_genos[[x]][prior_locus],kidgenos[[1]][x],probs)*like(1,dad_genos[[x]][locus],kidgenos[[2]][x],probs) }
  config00<-function(kidgenos,dad_genos,locus,x) { like(0,dad_genos[[x]][prior_locus],kidgenos[[1]][x],probs)*like(0,dad_genos[[x]][locus],kidgenos[[2]][x],probs) }
  
  repulsion=sapply(1:size.array,function(x) 
    max(
      (1-rec)*max( config10(kidgenos,dad_genos,locus,x),config01(kidgenos,dad_genos,locus,x) ),
      (rec)*max( config11(kidgenos,dad_genos,locus,x), config00(kidgenos,dad_genos,locus,x)  )
    )
  )
   
  link=sapply(1:size.array,function(x) 
    max(
      (rec)*max( config10(kidgenos,dad_genos,locus,x), config01(kidgenos,dad_genos,locus,x) ),
      (1-rec)*max( config11(kidgenos,dad_genos,locus,x), config00(kidgenos,dad_genos,locus,x)  )
    )
  )
     
  #where does mom have the 1
  mom1=which(sapply(emom,"[[",length(emom[[1]]))==1)
  mom0=which(sapply(emom,"[[",length(emom[[1]]))==0)

  if(prod(repulsion)>prod(link)){ 
    emom[[mom1]]=c(emom[[mom1]],0)
    emom[[mom0]]=c(emom[[mom0]],1)
    } else if( prod(repulsion)<prod(link)){ 
      emom[[mom1]]=c(emom[[mom1]],1)
      emom[[mom0]]=c(emom[[mom0]],0)
    } else{ #randomly picks when likelihoods are equal
      allele1=rbinom(1,1,.5)
      emom[[mom1]]=c(emom[[mom1]],allele1)
      emom[[mom0]]=c(emom[[mom0]],c(0,1)[which(c(0,1)!=allele1)])
    }
  return(emom)
}

Parameters and Probs

Number of kids, selfing rate, heterozygous->homozygous error, and number of numloci. For now assume:
* mom is known without error
* there is no homozygote->heterozygote error
* there is no homozygote->alt homozygote error

size.array=20
selfing=0.5
het.error=.70
dad.het.error=0.1
numloci=100

Here including heterozygote->homozygote error:

probs<-vector("list",3)
e=het.error
probs[[1]]<-matrix(c(1,0,0, e/2,1-e,e/2),byrow=TRUE,nrow=2,ncol=3 )
probs[[2]]<-matrix(c((e+2)/4,(1-e)/2,e/4, e/4,(1-e)/2,(e+2)/4),byrow=TRUE,nrow=2,ncol=3 )
probs[[3]]<-matrix(c(e/2,1-e,e/2, 0,0,1),byrow=TRUE,nrow=2,ncol=3 )

Create Progeny Array

Make an SFS

x=1:99/100 #all allele freqs in steps of 0.01
freq=1/x
sfs=as.numeric(); for(i in 1:99){sfs=c(sfs,rep(x[i],100*freq[i]))}
p=sample(sfs,numloci)

Make mom and dads

a1=ran.hap(numloci,p)
a2=ran.hap(numloci,p)

#noerror_mom=list(a1,a2)
#for(i in which(a1+a2==1)){ if(runif(1)<het.error{ a1[i]=sample(c(0,1),1); a2[i]=a1[i];}}#add error to mom
mom=list(a1,a2)

dad=vector("list",size.array)
dad=lapply(dad,function(a) if(runif(1)<selfing){ a=mom; } else{ a=list(ran.hap(numloci,p),ran.hap(numloci,p))  })
gendads=lapply(dad,function(a) gens(a,1:numloci)) #list of dads genotypes. can add error here later.
gendads=lapply(gendads,
      function(a) replace(a,sample(which(a==1),round(dad.het.error*length(which(a==1)))),sample(c(0,2),length(round(dad.het.error*length(which(a==1)))),replace=T))
      )

Make progeny array

progeny<-vector("list",size.array)
progeny<-lapply(1:size.array, function(a) kid(mom,dad[[a]],numloci,sfs,selfing,het.error))

Estimate mom

First we get the sets of numloci for which mom is heterozygous

phase_loci<-which(gens(mom,c(1:numloci))==1)

First we get true haplotypes and set first heterozygote phase in estimated mom

truemom=haps(mom,phase_loci)
emom=list(0,1)

Now we estimate likelihoods

for(i in 2:length(phase_loci)){
  emom=repolink(phase_loci[i],phase_loci[i-1],gendads,progeny,emom,0.5/numloci,probs)
}

How many differences between truemom and emom? Check both haps since order is arbitrary

min( length(phase_loci)-sum(truemom[[1]]==emom[[1]]), length(phase_loci)-sum(truemom[[1]]==emom[[2]]))
## [1] 0

Test

{r} # lengptm <- proc.time() # wronguns=as.numeric() # for(reps in 1:100){ # size.array=50 # selfing=0.5 # het.error=0.7 # numloci=1000 # # x=1:99/100 #all allele freqs in steps of 0.01 # freq=1/x # sfs=as.numeric(); for(i in 1:99){sfs=c(sfs,rep(x[i],100*freq[i]))} # p=sample(sfs,numloci) # # a1=ran.hap(numloci,p) # a2=ran.hap(numloci,p) # mom=list(a1,a2) # # dad=vector("list",size.array) # dad=lapply(dad,function(a) if(runif(1)<selfing){ a=mom; } else{ a=list(ran.hap(numloci,p),ran.hap(numloci,p)) }) # gendads=lapply(dad,function(a) gens(a,1:numloci)) #list of dads genotypes. can add error here later. # gendads=lapply(gendads,function(a) replace(a,sample(which(a==1),round(dad.het.error*length(which(a==1)))),sample(c(0,2),length(round(dad.het.error*length(which(a==1)))),replace=T))) # # progeny<-vector("list",size.array) # progeny<-lapply(1:size.array, function(a) kid(mom,dad[[a]],numloci,sfs,selfing,het.error)) # # phase_loci<-which(gens(mom,c(1:numloci))==1) # # truemom=haps(mom,phase_loci) # emom=list(0,1) # # for(i in 2:length(phase_loci)){ # emom=repolink(phase_loci[i],phase_loci[i-1],gendads,progeny,emom,0.5/numloci,probs) # } # # # min( length(phase_loci)-sum(truemom[[1]]==emom[[1]]), length(phase_loci)-sum(truemom[[1]]==emom[[2]])) # # # if(min( length(phase_loci)-sum(truemom[[1]]==emom[[1]]), length(phase_loci)-sum(truemom[[1]]==emom[[2]]))>0){ stop("bad one") } # wronguns[reps]=min( length(phase_loci)-sum(truemom[[1]]==emom[[1]]), length(phase_loci)-sum(truemom[[1]]==emom[[2]])) # } # wronguns # proc.time() - ptm #

Test code, ignore

{r} # fake_emom=function(emom,end){ # return(list(emom[[1]][1:end],emom[[2]][1:end])) # } # # #get kids genos for two subsequent markers # list(sapply(progeny,"[[",phase_loci[locus-1]),sapply(progeny,"[[",phase_loci[locus]) ) #