Reading teosinte data
library(tidyverse)
library(cowplot)
library(qtl)
teosinte.xo<-read.table(file="xo_ZeaGBSv27raw_RareAllelesC2TeoCurated_depth_AGPv4_filtered0210.txt",header=TRUE,stringsAsFactors=FALSE)
head(teosinte.xo)
## taxon chr start end
## 1 A0151 1 337863 369777
## 2 A0415 1 337863 369777
## 3 A1190 1 337863 369777
## 4 A2718 1 337863 369777
## 5 A0134 1 337863 1035713
## 6 A0557 1 334927 1035713
teosinte.parentage<-read.table(file="Parentage_for_imputeR.csv",header=TRUE,stringsAsFactors=FALSE,sep=",")
head(teosinte.parentage)
## Taxa Mother Father
## 1 A0003 PC_N14_ID2 PC_N14_ID2
## 2 A0004 PC_J48_ID2 PC_J48_ID2
## 3 A0008 PC_J13_ID1 PC_K60_ID1
## 4 A0013 PC_N10_ID2 PC_N10_ID2
## 5 A0014 PC_K60_ID1 PC_K60_ID1
## 6 A0015 PC_J10_ID1 PC_M15_ID1
Transforming teosinte data
teosinte.merged.xo<-merge(teosinte.xo,teosinte.parentage,by.x="taxon",by.y="Taxa")
teosinte.child.counts<- teosinte.parentage %>%
group_by(Mother) %>%
summarise(family_progeny_count = length(Mother))
teosinte.selfed.counts<-teosinte.parentage[teosinte.parentage$Mother==teosinte.parentage$Father,] %>%
group_by(Mother) %>%
summarise(family_selfed_count = length(Mother))
teosinte.xo.by.mom<- teosinte.merged.xo %>%
group_by(Mother) %>%
summarise(family_xo_count = length(Mother))
teosinte.merged.xo<-inner_join(teosinte.merged.xo,teosinte.child.counts)
teosinte.merged.xo<-inner_join(teosinte.merged.xo,teosinte.selfed.counts)
teosinte.merged.xo<-inner_join(teosinte.merged.xo,teosinte.xo.by.mom)
#head(teosinte.merged.xo)
teosinte.merged.moms<-inner_join(teosinte.xo.by.mom,teosinte.child.counts)
dim(teosinte.merged.moms)
## [1] 51 3
teosinte.merged.moms<-inner_join(teosinte.merged.moms,teosinte.selfed.counts)
head(teosinte.merged.moms)
## # A tibble: 6 × 4
## Mother family_xo_count family_progeny_count family_selfed_count
## <chr> <int> <int> <int>
## 1 PC_I05_ID1 2791 104 22
## 2 PC_I06_ID1 1793 74 37
## 3 PC_I08_ID1 2241 83 39
## 4 PC_I11_ID2 3381 128 43
## 5 PC_I50_ID1 2431 91 12
## 6 PC_I50_ID2 2360 72 54
dim(teosinte.merged.moms)
## [1] 49 4
teosinte.xo.over.progeny.count<-data.frame(rep("teosinte",dim(teosinte.merged.moms)[1]),teosinte.merged.moms$family_xo_count/teosinte.merged.moms$family_progeny_count)
names(teosinte.xo.over.progeny.count)<-c("teo.or.maize","xo_over_family_count")
head(teosinte.xo.over.progeny.count)
## teo.or.maize xo_over_family_count
## 1 teosinte 26.83654
## 2 teosinte 24.22973
## 3 teosinte 27.00000
## 4 teosinte 26.41406
## 5 teosinte 26.71429
## 6 teosinte 32.77778
Reading maize landraces data
landraces.parentage<-read.table(file="landrace_parentage_info.txt",header=TRUE,stringsAsFactors=FALSE,sep="\t")
head(landraces.parentage)
## proid parent1
## 1 13FL991195:250305857 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 2 14FL991792:250411065 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 3 14FL992565:250411841 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 4 14FL991871:250411166 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 5 14FL990555:250409701 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 6 14FL992680:250411969 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## parent2 progeny parent_1 parent_2
## 1 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054 1004 1 1
## 2 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054 4014 1 1
## 3 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054 4587 1 1
## 4 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054 4082 1 1
## 5 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054 3021 1 1
## 6 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054 4682 1 1
## qq_uu nloci
## 1 4062.908 24864
## 2 4013.364 24799
## 3 3859.624 24625
## 4 3968.140 24755
## 5 3986.508 24618
## 6 3971.058 24849
landraces.xo<-read.table(file="xo_Combined_LR13_LR14_parents2_AGPv4_filter0219.txt",
header=TRUE,stringsAsFactors=FALSE)
head(landraces.xo)
## taxon chr start end
## 1 13FL990708:250295003 1 262738 1023028
## 2 13FL991485:250349616 1 262855 1030583
## 3 14FL991631:250410929 1 262855 1218925
## 4 14FL991263:250410523 1 1030583 1228056
## 5 14FL990731:250409914 1 262855 1508268
## 6 14FL991166:250410414 1 1228478 1508268
Recombination rate plot for teosinte,30 bins total
ggplot(teosinte.xo,aes(x=start))+geom_histogram(bins=500)+facet_wrap(~chr,ncol=2)

Recombination rate plot for maize landraces,30 bins total
ggplot(landraces.xo,aes(x=start))+geom_histogram(bins=500)+facet_wrap(~chr,ncol=2)

Per family xo divided by family count
dim(teosinte.xo.over.progeny.count)
## [1] 49 2
dim(landraces.xo.over.progeny.count)
## [1] 35 2
xo.over.progeny.count<-rbind(teosinte.xo.over.progeny.count,landraces.xo.over.progeny.count)
head(xo.over.progeny.count)
## teo.or.maize xo_over_family_count
## 1 teosinte 26.83654
## 2 teosinte 24.22973
## 3 teosinte 27.00000
## 4 teosinte 26.41406
## 5 teosinte 26.71429
## 6 teosinte 32.77778
tail(xo.over.progeny.count)
## teo.or.maize xo_over_family_count
## 79 landraces 28.52941
## 80 landraces 26.93750
## 81 landraces 29.87879
## 82 landraces 27.38636
## 83 landraces 27.65385
## 84 landraces 27.72727
boxplot(xo.over.progeny.count$xo_over_family_count ~xo.over.progeny.count$teo.or.maize,
main="Per family xo/progeny.count")

Filtering for family.selfed.count>20
teosinte.merged.moms<-teosinte.merged.moms[teosinte.merged.moms$family_selfed_count>20,]
teosinte.xo.over.progeny.count<-data.frame(rep("teosinte",dim(teosinte.merged.moms)[1]),teosinte.merged.moms$family_xo_count/teosinte.merged.moms$family_progeny_count)
names(teosinte.xo.over.progeny.count)<-c("teo.or.maize","xo_over_family_count")
landraces.merged.moms<-landraces.merged.moms[landraces.merged.moms$family_selfed_count>20,]
landraces.xo.over.progeny.count<-data.frame(rep("landraces",dim(landraces.merged.moms)[1]),landraces.merged.moms$family_xo_count/landraces.merged.moms$family_progeny_count)
names(landraces.xo.over.progeny.count)<-c("teo.or.maize","xo_over_family_count")
dim(teosinte.xo.over.progeny.count)
## [1] 41 2
dim(landraces.xo.over.progeny.count)
## [1] 24 2
And plot again
xo.over.progeny.count<-rbind(teosinte.xo.over.progeny.count,landraces.xo.over.progeny.count)
head(xo.over.progeny.count)
## teo.or.maize xo_over_family_count
## 1 teosinte 26.83654
## 2 teosinte 24.22973
## 3 teosinte 27.00000
## 4 teosinte 26.41406
## 5 teosinte 32.77778
## 6 teosinte 25.58559
tail(xo.over.progeny.count)
## teo.or.maize xo_over_family_count
## 60 landraces 28.52941
## 61 landraces 26.93750
## 62 landraces 29.87879
## 63 landraces 27.38636
## 64 landraces 27.65385
## 65 landraces 27.72727
boxplot(xo.over.progeny.count$xo_over_family_count ~xo.over.progeny.count$teo.or.maize,
main="Per family xo/progeny.count")

#to pinpoint outlier landrace families
landraces.xo.over.progeny.count<-data.frame(landraces.merged.moms$parent1,landraces.merged.moms$family_xo_count/landraces.merged.moms$family_progeny_count)
names(landraces.xo.over.progeny.count)[2]<-"xo_over_family_count"
landraces.xo.over.progeny.count[landraces.xo.over.progeny.count$xo_over_family_count>40,]
## [1] landraces.merged.moms.parent1 xo_over_family_count
## <0 rows> (or 0-length row.names)
offending_families<- as.character(landraces.xo.over.progeny.count[landraces.xo.over.progeny.count$xo_over_family_count>40,]$landraces.merged.moms.parent1)
offending_families
## character(0)
Remove outlier families and plot again
landraces.xo.over.progeny.count<-landraces.xo.over.progeny.count[landraces.xo.over.progeny.count$xo_over_family_count<40,]
landraces.xo.over.progeny.count<-data.frame(rep("landraces",dim(landraces.xo.over.progeny.count)[1]),landraces.xo.over.progeny.count$xo_over_family_count)
names(landraces.xo.over.progeny.count)<-c("teo.or.maize","xo_over_family_count")
xo.over.progeny.count<-rbind(teosinte.xo.over.progeny.count,landraces.xo.over.progeny.count)
boxplot(xo.over.progeny.count$xo_over_family_count ~xo.over.progeny.count$teo.or.maize,
main="Per family xo/progeny.count")

Filtering xo events, removing families with <20 selfed progeny and of the three landrace families with outlier xo/progeny.count
dim(teosinte.merged.xo)
## [1] 126736 9
dim(landraces.merged.xo)
## [1] 122206 9
#number of teosinte families
length(unique(teosinte.merged.xo$Mother))
## [1] 49
#number of landrace families
length(unique(landraces.merged.xo$parent1))
## [1] 35
teosinte.merged.xo<-teosinte.merged.xo[teosinte.merged.xo$family_selfed_count>20,]
landraces.merged.xo<-landraces.merged.xo[landraces.merged.xo$family_selfed_count>20
& !(landraces.merged.xo$parent1 %in% offending_families),]
dim(teosinte.merged.xo)
## [1] 106294 9
dim(landraces.merged.xo)
## [1] 93874 9
teosinte.moms<-unique(teosinte.merged.xo$Mother)
#number of teosinte families left after filtering
length(teosinte.moms)
## [1] 41
#number of landrace families left after filtering
landraces.moms<-unique(landraces.merged.xo$parent1)
length(landraces.moms)
## [1] 24
Plotting filtered data
Recombination rate plot for teosinte,30 bins total
ggplot(teosinte.merged.xo,aes(x=start))+geom_histogram(bins=500)+facet_wrap(~chr,ncol=2)

Recombination rate plot for maize landraces,30 bins total
ggplot(landraces.merged.xo,aes(x=start))+geom_histogram(bins=500)+facet_wrap(~chr,ncol=2)

And now actually plotting by family
Chr2, families 1:10
ggplot(data=filter(teosinte.merged.xo,chr==2,Mother %in% teosinte.moms[1:10],start>0),aes(x=start,y=(..density..)))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

#+theme(legend.position="none")
Chr2, families 11:20
ggplot(data=filter(teosinte.merged.xo,chr==2,Mother %in% teosinte.moms[11:20],start>0 ),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

#+theme(legend.position="none")
Chr2, families 21:30
ggplot(data=filter(teosinte.merged.xo,chr==2,Mother %in% teosinte.moms[21:30],start>0 ),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

Chr2, families 31:41
ggplot(data=filter(teosinte.merged.xo,chr==2,Mother %in% teosinte.moms[31:40],start>0 ),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

Chr2, families ALL FAMILIES
ggplot(data=filter(teosinte.merged.xo,chr==2),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),alpha=1/4,binwidth=1e7,show.legend=FALSE)

ALL FAMILIES ALL CHROMOSOMES
ggplot(data=teosinte.merged.xo,aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),alpha=1/4,binwidth=1e7,show.legend=FALSE)+facet_wrap(~chr,ncol=2,scales = "free")
