library(tidyverse)
library(cowplot)
library(qtl)
I have: imputed GBS data for for ~50 families (share same maternal parent) in .vcf format
I will use: Eagle 2 with cohort phasing (instead of known hapmap data)
For Eagle2 I need:
From Peter Bradbury’s imputation and phasing
xo<-read.table(file="xo_ZeaGBSv27raw_RareAllelesC2TeoCurated_depth_AGPv4_filtered1128.txt",
header=TRUE,stringsAsFactors=FALSE)
dim(xo)
## [1] 131097 4
head(xo)
## taxon chr start end
## 1 A0151 1 337863 369777
## 2 A0415 1 337863 369777
## 3 A1190 1 337863 369777
## 4 A2718 1 337863 369777
## 5 A0134 1 337863 1035713
## 6 A0557 1 334927 1035713
parentage<-read.table(file="Parentage_for_imputeR.csv",
header=TRUE,stringsAsFactors=FALSE,sep=",")
head(parentage)
## Taxa Mother Father
## 1 A0003 PC_N14_ID2 PC_N14_ID2
## 2 A0004 PC_J48_ID2 PC_J48_ID2
## 3 A0008 PC_J13_ID1 PC_K60_ID1
## 4 A0013 PC_N10_ID2 PC_N10_ID2
## 5 A0014 PC_K60_ID1 PC_K60_ID1
## 6 A0015 PC_J10_ID1 PC_M15_ID1
moms<-as.character(levels(as.factor(parentage$Mother)))
length(moms)
## [1] 51
plot.chr1<-ggplot(filter(xo,chr==1),aes(x=start))+geom_histogram()
ggdraw()+draw_plot(plot.chr1)+draw_plot_label("Chr1", size = 15)
window.size<-2e7
current.chr<-1
max.end<-max(filter(xo,chr==current.chr)$end)
my.breaks<-seq(from=0,to=max.end,by=window.size)
if(max(my.breaks)<max.end){ my.breaks<-c(my.breaks,max.end)}
#length(my.breaks)
#max(my.breaks)
chr1.table<-hist(xo[xo$chr==1,]$end,breaks = my.breaks,plot=FALSE)
head(chr1.table$counts)
## [1] 3609 1543 1122 862 525 606
head(chr1.table$breaks)
## [1] 0e+00 2e+07 4e+07 6e+07 8e+07 1e+08
num.individuals<-length(unique(xo$taxon))
num.individuals
## [1] 4705
prob.recombination<-
chr1.table$counts/(2*num.individuals)
#prob.recombination<-
# chr1.table$counts/sum(chr1.table$counts)
# pos<- 50 * log(1/(1-2*prob.recombination)) #no more Haldanes map funciton needed
pos<-prob.recombination*100
head(pos)
## [1] 38.352816 16.397450 11.923486 9.160468 5.579171 6.439957
for (i in 2:length(pos)) {
pos[i]=pos[i]+pos[i-1]
}
pos<-c(0,pos) #adding the start of the chromosome
tab <- data.frame(chr=rep(1,length(pos)),
pos=pos)
rownames(tab) <- paste0(as.character(1),"_", round(my.breaks/1e6,digits=0),"Mb")
map <- table2map(tab)
head(map)
## 1_0Mb 1_20Mb 1_40Mb 1_60Mb 1_80Mb 1_100Mb 1_120Mb
## 0.00000 38.35282 54.75027 66.67375 75.83422 81.41339 87.85335
## 1_140Mb 1_160Mb 1_180Mb 1_200Mb 1_220Mb 1_240Mb 1_260Mb
## 90.17003 93.33688 97.33262 107.16259 118.72476 131.47715 147.34325
## 1_280Mb 1_300Mb 1_307Mb
## 160.23379 189.81934 202.44421
plotMap(map,chr=c(1), show.marker.names=TRUE,
main="Chromosome 1 teosinte")
OgutMap<-read.table(file="ogut_fifthcM_map_agpv4.txt",
header=FALSE,stringsAsFactors=FALSE)
names(OgutMap)<-c("marker.name.1","marker.name.2","cM","chromosome","position")
head(OgutMap)
## marker.name.1 marker.name.2 cM chromosome position
## 1 S_88897 M1 -4.8 1 134914
## 2 S_170386 M2 -4.6 1 223935
## 3 S_251876 M3 -4.4 1 350753
## 4 S_333365 M4 -4.2 1 380021
## 5 S_496344 M6 -3.8 1 541304
## 6 S_577834 M7 -3.6 1 612144
dim(OgutMap)
## [1] 6740 5
dim(OgutMap[OgutMap$chromosome==1,])
## [1] 985 5
window.size<-2e7
chromo.keep<-c()
position.keep<-c()
cM.keep<-c()
for(chr in 1:10) {
current.chromosome<-OgutMap[OgutMap$chromosome==chr,]
current.chromosome.min.cM<-min(current.chromosome$cM)
current.max=max(current.chromosome$position)
max.limit<-(abs(current.max/window.size)+1)*window.size
current.breaks=seq(from=0,to=max.limit,by=window.size)
for (j in 1:length(current.breaks)) {
best.index<-which.min(abs(current.chromosome$position - current.breaks[j]))
#print(best.index)
chromo.keep<-c(chromo.keep,chr)
position.keep<-c(position.keep,current.chromosome[best.index,"position"])
cM.keep<-c( cM.keep,current.chromosome[best.index,"cM"]+abs(current.chromosome.min.cM))
}
}
roughOgutData<-data.frame(chr=chromo.keep,pos=cM.keep,phys_pos=position.keep)
head(roughOgutData)
## chr pos phys_pos
## 1 1 0.0 134914
## 2 1 40.6 19912274
## 3 1 62.2 39965698
## 4 1 76.2 60072613
## 5 1 84.8 79738562
## 6 1 91.8 99413055
tail(roughOgutData)
## chr pos phys_pos
## 116 10 42.8 63639269
## 117 10 45.6 80508347
## 118 10 49.6 100056068
## 119 10 53.4 120024729
## 120 10 72.4 139943108
## 121 10 112.8 150873537
rounded.phys.pos<-round(roughOgutData$phys_pos/1e6,digits=0)
rownames(roughOgutData) <- paste0(as.character(chromo.keep),"_", rounded.phys.pos,"Mb")
rough.ogut.map <- table2map(roughOgutData)
plotMap(rough.ogut.map,chr=c(1),show.marker.names=TRUE,main="Chromosome 1 maize")
left map is Maize (from Ogut et. al), right map is from Teosinte
plotMap(rough.ogut.map,map,chr=c(1),show.marker.names=TRUE,shift=TRUE)
tab[tab$chr==1,]
## chr pos
## 1_0Mb 1 0.00000
## 1_20Mb 1 38.35282
## 1_40Mb 1 54.75027
## 1_60Mb 1 66.67375
## 1_80Mb 1 75.83422
## 1_100Mb 1 81.41339
## 1_120Mb 1 87.85335
## 1_140Mb 1 90.17003
## 1_160Mb 1 93.33688
## 1_180Mb 1 97.33262
## 1_200Mb 1 107.16259
## 1_220Mb 1 118.72476
## 1_240Mb 1 131.47715
## 1_260Mb 1 147.34325
## 1_280Mb 1 160.23379
## 1_300Mb 1 189.81934
## 1_307Mb 1 202.44421
roughOgutData[roughOgutData$chr==1,]
## chr pos phys_pos
## 1_0Mb 1 0.0 134914
## 1_20Mb 1 40.6 19912274
## 1_40Mb 1 62.2 39965698
## 1_60Mb 1 76.2 60072613
## 1_80Mb 1 84.8 79738562
## 1_99Mb 1 91.8 99413055
## 1_120Mb 1 93.8 119583370
## 1_132Mb 1 94.0 131929986
## 1_161Mb 1 95.8 160996844
## 1_180Mb 1 100.4 180044375
## 1_200Mb 1 115.0 199956280
## 1_220Mb 1 131.8 220202926
## 1_240Mb 1 144.0 239783178
## 1_260Mb 1 155.6 259906710
## 1_280Mb 1 171.2 280012317
## 1_300Mb 1 198.2 300092579
## 1_307Mb 1 210.2 306879533
window.size<-2e7
chromo.keep<-c()
position.keep<-c()
cM.keep<-c()
for (current.chr in 1:10) {
max.end<-max(filter(xo,chr==current.chr)$end)
my.breaks<-seq(from=0,to=max.end,by=window.size)
if(max(my.breaks)<max.end){ my.breaks<-c(my.breaks,max.end)}
current.chr.table<-hist(xo[xo$chr==current.chr,]$end,breaks = my.breaks,plot=FALSE)
#prob.recombination<-current.chr.table$counts/sum(current.chr.table$counts)
prob.recombination<-
current.chr.table$counts/(2*num.individuals)
#pos<- 50 * log(1/(1-2*prob.recombination))
pos<-prob.recombination*100
for (i in 2:length(pos)) {
pos[i]=pos[i]+pos[i-1]
}
pos<-c(0,pos) #adding an start of the chromosome
chromo.keep<-c(chromo.keep,rep(current.chr,length(pos)))
cM.keep<-c(cM.keep,pos)
position.keep<-c(position.keep,my.breaks)
}
roughTeoData<-data.frame(chr=chromo.keep,pos=cM.keep,phys_pos=position.keep)
rownames(roughTeoData) <- paste0(as.character(roughTeoData$chr),"_",round(roughTeoData$phys_pos/1e6,digits=0),"Mb")
head(roughTeoData)
## chr pos phys_pos
## 1_0Mb 1 0.00000 0e+00
## 1_20Mb 1 38.35282 2e+07
## 1_40Mb 1 54.75027 4e+07
## 1_60Mb 1 66.67375 6e+07
## 1_80Mb 1 75.83422 8e+07
## 1_100Mb 1 81.41339 1e+08
tail(roughTeoData)
## chr pos phys_pos
## 10_60Mb 10 34.08077 60000000
## 10_80Mb 10 37.64081 80000000
## 10_100Mb 10 42.13603 100000000
## 10_120Mb 10 45.51541 120000000
## 10_140Mb 10 65.47290 140000000
## 10_150Mb 10 102.09352 150357239
teoMap <- table2map(roughTeoData)
plotMap(teoMap,show.marker.names=FALSE,main="Teo all")
for(my.chr in 1:10) {
par(mfrow=c(1,2))
plotMap(teoMap,chr=c(my.chr), show.marker.names=TRUE, main=paste0("Chromosome ",my.chr ," teosinte"))
plotMap(rough.ogut.map,chr=c(my.chr),show.marker.names=TRUE,main=paste0("Chromosome ",my.chr ," maize"))
}
max.per.chromosome.teo<-tapply(roughTeoData$pos,roughTeoData$chr,max)
max.per.chromosome.teo
## 1 2 3 4 5 6 7 8
## 202.4442 168.1296 143.8151 133.4644 145.7386 126.6738 132.2210 127.1095
## 9 10
## 111.4772 102.0935
total.cM.teosinte<-sum(max.per.chromosome.teo)
total.cM.teosinte
## [1] 1393.167
## former negative cM in Ogut's data have been adjusted to 0
## hence the final cM is larger here that in Ogut's
max.per.chromosome.maize<-tapply(roughOgutData$pos,roughOgutData$chr,max)
max.per.chromosome.maize
## 1 2 3 4 5 6 7 8 9 10
## 210.2 160.0 161.4 151.8 157.0 111.8 138.4 137.4 130.2 112.8
total.cM.Maize<-sum(max.per.chromosome.maize)
total.cM.Maize
## [1] 1471