Loading libraries

library(tidyverse)
library(cowplot)
library(qtl)

Phasing teosinte parents with Eagle2

I have: imputed GBS data for for ~50 families (share same maternal parent) in .vcf format

I will use: Eagle 2 with cohort phasing (instead of known hapmap data)

For Eagle2 I need:

I have recombination events for the individuals from Teosinte populations

From Peter Bradbury’s imputation and phasing

xo<-read.table(file="xo_ZeaGBSv27raw_RareAllelesC2TeoCurated_depth_AGPv4_filtered1128.txt",
               header=TRUE,stringsAsFactors=FALSE)
dim(xo)
## [1] 131097      4
head(xo)
##   taxon chr  start     end
## 1 A0151   1 337863  369777
## 2 A0415   1 337863  369777
## 3 A1190   1 337863  369777
## 4 A2718   1 337863  369777
## 5 A0134   1 337863 1035713
## 6 A0557   1 334927 1035713

And parent offspring relationships

parentage<-read.table(file="Parentage_for_imputeR.csv",
                      header=TRUE,stringsAsFactors=FALSE,sep=",")
head(parentage)
##    Taxa     Mother     Father
## 1 A0003 PC_N14_ID2 PC_N14_ID2
## 2 A0004 PC_J48_ID2 PC_J48_ID2
## 3 A0008 PC_J13_ID1 PC_K60_ID1
## 4 A0013 PC_N10_ID2 PC_N10_ID2
## 5 A0014 PC_K60_ID1 PC_K60_ID1
## 6 A0015 PC_J10_ID1 PC_M15_ID1
moms<-as.character(levels(as.factor(parentage$Mother)))
length(moms)
## [1] 51

Recombination events along chromosome 1 (Teosinte)

 plot.chr1<-ggplot(filter(xo,chr==1),aes(x=start))+geom_histogram()
 ggdraw()+draw_plot(plot.chr1)+draw_plot_label("Chr1", size = 15)

Making a genetic Map for Chromosome 1 (resolution: 20 Mb)

 window.size<-2e7
 current.chr<-1
 max.end<-max(filter(xo,chr==current.chr)$end)
 
 my.breaks<-seq(from=0,to=max.end,by=window.size)
 if(max(my.breaks)<max.end){ my.breaks<-c(my.breaks,max.end)}
 #length(my.breaks)
 #max(my.breaks)
 chr1.table<-hist(xo[xo$chr==1,]$end,breaks = my.breaks,plot=FALSE)  
 head(chr1.table$counts)
## [1] 3609 1543 1122  862  525  606
 head(chr1.table$breaks)
## [1] 0e+00 2e+07 4e+07 6e+07 8e+07 1e+08
  num.individuals<-length(unique(xo$taxon))
  num.individuals
## [1] 4705
  prob.recombination<- 
    chr1.table$counts/(2*num.individuals)
  
  #prob.recombination<- 
  #  chr1.table$counts/sum(chr1.table$counts)
  
  
  # pos<- 50 * log(1/(1-2*prob.recombination)) #no more Haldanes map funciton needed
  pos<-prob.recombination*100
  
  head(pos)
## [1] 38.352816 16.397450 11.923486  9.160468  5.579171  6.439957
  for (i in 2:length(pos)) {
    pos[i]=pos[i]+pos[i-1]
  }
  pos<-c(0,pos) #adding the start of the chromosome
  tab <- data.frame(chr=rep(1,length(pos)),
                  pos=pos)

  rownames(tab) <- paste0(as.character(1),"_", round(my.breaks/1e6,digits=0),"Mb")
  

  map <- table2map(tab)
  head(map)
##     1_0Mb    1_20Mb    1_40Mb    1_60Mb    1_80Mb   1_100Mb   1_120Mb 
##   0.00000  38.35282  54.75027  66.67375  75.83422  81.41339  87.85335 
##   1_140Mb   1_160Mb   1_180Mb   1_200Mb   1_220Mb   1_240Mb   1_260Mb 
##  90.17003  93.33688  97.33262 107.16259 118.72476 131.47715 147.34325 
##   1_280Mb   1_300Mb   1_307Mb 
## 160.23379 189.81934 202.44421

Plotting with R/Qtl

  plotMap(map,chr=c(1), show.marker.names=TRUE,
          main="Chromosome 1 teosinte")

Loading maize genetic map from Ogut et al. 2015

OgutMap<-read.table(file="ogut_fifthcM_map_agpv4.txt",
                      header=FALSE,stringsAsFactors=FALSE)
names(OgutMap)<-c("marker.name.1","marker.name.2","cM","chromosome","position")
head(OgutMap)
##   marker.name.1 marker.name.2   cM chromosome position
## 1       S_88897            M1 -4.8          1   134914
## 2      S_170386            M2 -4.6          1   223935
## 3      S_251876            M3 -4.4          1   350753
## 4      S_333365            M4 -4.2          1   380021
## 5      S_496344            M6 -3.8          1   541304
## 6      S_577834            M7 -3.6          1   612144
dim(OgutMap)
## [1] 6740    5
dim(OgutMap[OgutMap$chromosome==1,])
## [1] 985   5

finding markers similar to our simulated markers

 window.size<-2e7
 chromo.keep<-c()
 position.keep<-c()
 cM.keep<-c()
for(chr in 1:10) {
  current.chromosome<-OgutMap[OgutMap$chromosome==chr,]
  current.chromosome.min.cM<-min(current.chromosome$cM)
  current.max=max(current.chromosome$position) 
  max.limit<-(abs(current.max/window.size)+1)*window.size
  current.breaks=seq(from=0,to=max.limit,by=window.size)
  
  for (j in 1:length(current.breaks)) {
    best.index<-which.min(abs(current.chromosome$position - current.breaks[j])) 
    #print(best.index)
    chromo.keep<-c(chromo.keep,chr)
    position.keep<-c(position.keep,current.chromosome[best.index,"position"])
    cM.keep<-c( cM.keep,current.chromosome[best.index,"cM"]+abs(current.chromosome.min.cM))
  }
  
}
 
 roughOgutData<-data.frame(chr=chromo.keep,pos=cM.keep,phys_pos=position.keep)
 head(roughOgutData)
##   chr  pos phys_pos
## 1   1  0.0   134914
## 2   1 40.6 19912274
## 3   1 62.2 39965698
## 4   1 76.2 60072613
## 5   1 84.8 79738562
## 6   1 91.8 99413055
 tail(roughOgutData)
##     chr   pos  phys_pos
## 116  10  42.8  63639269
## 117  10  45.6  80508347
## 118  10  49.6 100056068
## 119  10  53.4 120024729
## 120  10  72.4 139943108
## 121  10 112.8 150873537
 rounded.phys.pos<-round(roughOgutData$phys_pos/1e6,digits=0)
 rownames(roughOgutData) <- paste0(as.character(chromo.keep),"_",  rounded.phys.pos,"Mb")
 
 
  rough.ogut.map <- table2map(roughOgutData)

Plotting a maize genetic map for Chr1 from Ogut et al. data

  plotMap(rough.ogut.map,chr=c(1),show.marker.names=TRUE,main="Chromosome 1 maize")

One besides the other

Comparing Maps with R/Qtl function

left map is Maize (from Ogut et. al), right map is from Teosinte

  plotMap(rough.ogut.map,map,chr=c(1),show.marker.names=TRUE,shift=TRUE)

  tab[tab$chr==1,]
##         chr       pos
## 1_0Mb     1   0.00000
## 1_20Mb    1  38.35282
## 1_40Mb    1  54.75027
## 1_60Mb    1  66.67375
## 1_80Mb    1  75.83422
## 1_100Mb   1  81.41339
## 1_120Mb   1  87.85335
## 1_140Mb   1  90.17003
## 1_160Mb   1  93.33688
## 1_180Mb   1  97.33262
## 1_200Mb   1 107.16259
## 1_220Mb   1 118.72476
## 1_240Mb   1 131.47715
## 1_260Mb   1 147.34325
## 1_280Mb   1 160.23379
## 1_300Mb   1 189.81934
## 1_307Mb   1 202.44421
  roughOgutData[roughOgutData$chr==1,]
##         chr   pos  phys_pos
## 1_0Mb     1   0.0    134914
## 1_20Mb    1  40.6  19912274
## 1_40Mb    1  62.2  39965698
## 1_60Mb    1  76.2  60072613
## 1_80Mb    1  84.8  79738562
## 1_99Mb    1  91.8  99413055
## 1_120Mb   1  93.8 119583370
## 1_132Mb   1  94.0 131929986
## 1_161Mb   1  95.8 160996844
## 1_180Mb   1 100.4 180044375
## 1_200Mb   1 115.0 199956280
## 1_220Mb   1 131.8 220202926
## 1_240Mb   1 144.0 239783178
## 1_260Mb   1 155.6 259906710
## 1_280Mb   1 171.2 280012317
## 1_300Mb   1 198.2 300092579
## 1_307Mb   1 210.2 306879533

Now working with all 10 chromosomes

 window.size<-2e7
 chromo.keep<-c()
 position.keep<-c()
 cM.keep<-c() 
 for (current.chr in 1:10) {

   max.end<-max(filter(xo,chr==current.chr)$end)
   
   my.breaks<-seq(from=0,to=max.end,by=window.size)
   if(max(my.breaks)<max.end){ my.breaks<-c(my.breaks,max.end)}
   current.chr.table<-hist(xo[xo$chr==current.chr,]$end,breaks = my.breaks,plot=FALSE)  
   #prob.recombination<-current.chr.table$counts/sum(current.chr.table$counts)
   prob.recombination<- 
   current.chr.table$counts/(2*num.individuals)
   
   #pos<- 50 * log(1/(1-2*prob.recombination))
   pos<-prob.recombination*100
   for (i in 2:length(pos)) {
    pos[i]=pos[i]+pos[i-1]
   }
   pos<-c(0,pos) #adding an start of the chromosome
   chromo.keep<-c(chromo.keep,rep(current.chr,length(pos)))
   cM.keep<-c(cM.keep,pos)
   position.keep<-c(position.keep,my.breaks)
   
 }
 
  roughTeoData<-data.frame(chr=chromo.keep,pos=cM.keep,phys_pos=position.keep)
  
  rownames(roughTeoData) <- paste0(as.character(roughTeoData$chr),"_",round(roughTeoData$phys_pos/1e6,digits=0),"Mb")
  head(roughTeoData)
##         chr      pos phys_pos
## 1_0Mb     1  0.00000    0e+00
## 1_20Mb    1 38.35282    2e+07
## 1_40Mb    1 54.75027    4e+07
## 1_60Mb    1 66.67375    6e+07
## 1_80Mb    1 75.83422    8e+07
## 1_100Mb   1 81.41339    1e+08
  tail(roughTeoData)
##          chr       pos  phys_pos
## 10_60Mb   10  34.08077  60000000
## 10_80Mb   10  37.64081  80000000
## 10_100Mb  10  42.13603 100000000
## 10_120Mb  10  45.51541 120000000
## 10_140Mb  10  65.47290 140000000
## 10_150Mb  10 102.09352 150357239

And plotting the map for the ten Teosinte chromosomes

  teoMap <- table2map(roughTeoData)
  plotMap(teoMap,show.marker.names=FALSE,main="Teo all")

Individual comparisons

for(my.chr in 1:10) {
  par(mfrow=c(1,2))
  plotMap(teoMap,chr=c(my.chr), show.marker.names=TRUE, main=paste0("Chromosome ",my.chr ," teosinte"))
  plotMap(rough.ogut.map,chr=c(my.chr),show.marker.names=TRUE,main=paste0("Chromosome ",my.chr ," maize"))
  
}

Total distance in cM

  max.per.chromosome.teo<-tapply(roughTeoData$pos,roughTeoData$chr,max)
  max.per.chromosome.teo
##        1        2        3        4        5        6        7        8 
## 202.4442 168.1296 143.8151 133.4644 145.7386 126.6738 132.2210 127.1095 
##        9       10 
## 111.4772 102.0935
  total.cM.teosinte<-sum(max.per.chromosome.teo)
  total.cM.teosinte
## [1] 1393.167
  ## former negative cM in Ogut's data have been adjusted to 0
  ## hence the final cM is larger here that in Ogut's
  max.per.chromosome.maize<-tapply(roughOgutData$pos,roughOgutData$chr,max)
  max.per.chromosome.maize
##     1     2     3     4     5     6     7     8     9    10 
## 210.2 160.0 161.4 151.8 157.0 111.8 138.4 137.4 130.2 112.8
  total.cM.Maize<-sum(max.per.chromosome.maize)
  total.cM.Maize
## [1] 1471