Some recombination rate plots (Teosinte populations)

library(tidyverse)
library(cowplot)

xo<-read.table(file="xo_ZeaGBSv27raw_RareAllelesC2TeoCurated_depth_AGPv4_filtered0210.txt",header=TRUE,stringsAsFactors=FALSE)
head(xo)
##   taxon chr  start     end
## 1 A0151   1 337863  369777
## 2 A0415   1 337863  369777
## 3 A1190   1 337863  369777
## 4 A2718   1 337863  369777
## 5 A0134   1 337863 1035713
## 6 A0557   1 334927 1035713
parentage<-read.table(file="Parentage_for_imputeR.csv",header=TRUE,stringsAsFactors=FALSE,sep=",")
head(parentage)
##    Taxa     Mother     Father
## 1 A0003 PC_N14_ID2 PC_N14_ID2
## 2 A0004 PC_J48_ID2 PC_J48_ID2
## 3 A0008 PC_J13_ID1 PC_K60_ID1
## 4 A0013 PC_N10_ID2 PC_N10_ID2
## 5 A0014 PC_K60_ID1 PC_K60_ID1
## 6 A0015 PC_J10_ID1 PC_M15_ID1
moms<-as.character(levels(as.factor(parentage$Mother)))
length(moms)
## [1] 51
 plot.chr1<-ggplot(filter(xo,chr==1),aes(x=start))+geom_histogram()
 ggdraw()+draw_plot(plot.chr1)+draw_plot_label("Chr1", size = 15)

10kb bins Chr1

1mb bins Chr1

All 10 chromosomes, histogram, 30 bins total

ggplot(xo,aes(x=start))+geom_histogram(bins=500)+facet_wrap(~chr,ncol=2)

50kb bins Chr6

Transforming the data for plotting by family

#library(plyr)

merged.xo<-merge(xo,parentage,by.x="taxon",by.y="Taxa")
dim(xo)
## [1] 127546      4
dim(merged.xo)
## [1] 127330      6
dim(xo[xo$taxon%in% parentage$Taxa,])
## [1] 127330      4
dim(xo[!xo$taxon%in% parentage$Taxa,])
## [1] 216   4
head(xo[!xo$taxon%in% parentage$Taxa,])
##           taxon chr    start      end
## 74   PC_N10_ID2   1  1228056  1817331
## 2647 PC_N14_ID2   1  8308456 11128682
## 3736 PC_O51_ID2   1 22270664 22423563
## 3754 PC_O51_ID2   1 22611838 22739284
## 4001 PC_N07_ID2   1  5434296 26043961
## 4435 PC_N14_ID2   1 21692141 32152482
child.counts<- parentage %>% 
  group_by(Mother) %>%
  summarise(progeny_count = length(Mother))

 
merged.xo<-inner_join(merged.xo,child.counts)
## Joining, by = "Mother"
head(merged.xo)
##   taxon chr     start       end     Mother     Father progeny_count
## 1 A0003   2  29540794  29626929 PC_N14_ID2 PC_N14_ID2           100
## 2 A0003   3 142168385 142170876 PC_N14_ID2 PC_N14_ID2           100
## 3 A0003   8 172140327 172152248 PC_N14_ID2 PC_N14_ID2           100
## 4 A0003   1 303657036 303767010 PC_N14_ID2 PC_N14_ID2           100
## 5 A0003   3 207623726 207757893 PC_N14_ID2 PC_N14_ID2           100
## 6 A0003   2   2808457   2990328 PC_N14_ID2 PC_N14_ID2           100
 plot.chr6<-ggplot(data=filter(merged.xo,chr==6),aes(x=start))+geom_freqpoly(binwidth=5e4)
  ggdraw()+draw_plot(plot.chr6)+draw_plot_label("Chr6", size = 15)

And now actually plotting by family

Chr2, families 1:10

ggplot(data=filter(merged.xo,chr==2,Mother %in% moms[1:10],start>0),aes(x=start,y=(..density..)))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

#+theme(legend.position="none")

Chr2, families 11:20

 ggplot(data=filter(merged.xo,chr==2,Mother %in% moms[11:20],start>0 ),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

#+theme(legend.position="none")

Chr2, families 21:30

 ggplot(data=filter(merged.xo,chr==2,Mother %in% moms[21:30],start>0 ),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

Chr2, families 31:40

 ggplot(data=filter(merged.xo,chr==2,Mother %in% moms[31:40],start>0 ),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

Chr2, families 41:51

 ggplot(data=filter(merged.xo,chr==2,Mother %in% moms[41:51],start>0 ),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),binwidth=1e7)

Chr2, families ALL FAMILIES

 ggplot(data=filter(merged.xo,chr==2),aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),alpha=1/4,binwidth=1e7,show.legend=FALSE)

ALL FAMILIES ALL CHROMOSOMES

 ggplot(data=merged.xo,aes(x=start,y=..density..))+geom_freqpoly(mapping=aes(color=Mother),alpha=1/4,binwidth=1e7,show.legend=FALSE)+facet_wrap(~chr,ncol=2,scales = "free")

Report of mean xo per family

teosinte.child.counts<- parentage %>% 
  group_by(Mother) %>%
  summarise(progeny_count = length(Mother))

teosinte.selfed.counts<-parentage[parentage$Mother==parentage$Father,] %>%
   group_by(Mother) %>%
  summarise(progeny_count = length(Mother))

 teosinte.xo.by.mom<- merged.xo %>% 
  group_by(Mother) %>%
  summarise(xo_count = length(Mother))
 


teosinte.merged.moms<-inner_join(teosinte.xo.by.mom,teosinte.child.counts)

head(teosinte.merged.moms)
## # A tibble: 6 × 3
##       Mother xo_count progeny_count
##        <chr>    <int>         <int>
## 1 PC_I05_ID1     2791           104
## 2 PC_I06_ID1     1793            74
## 3 PC_I08_ID1     2241            83
## 4 PC_I11_ID2     3381           128
## 5 PC_I50_ID1     2431            91
## 6 PC_I50_ID2     2360            72
teosinte.xo.over.progeny.count<-data.frame(rep("teosinte",dim(teosinte.merged.moms)[1]),teosinte.merged.moms$xo_count/teosinte.merged.moms$progeny_count)
names(teosinte.xo.over.progeny.count)<-c("teo.or.maize","xo_over_family_count")



## And for maize landraces
landraces.parentage<-read.table(file="landrace_parentage_info.txt",header=TRUE,stringsAsFactors=FALSE,sep="\t")
head(landraces.parentage)
##                  proid                                    parent1
## 1 13FL991195:250305857 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 2 14FL991792:250411065 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 3 14FL992565:250411841 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 4 14FL991871:250411166 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 5 14FL990555:250409701 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
## 6 14FL992680:250411969 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054
##                                      parent2 progeny parent_1 parent_2
## 1 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054    1004        1        1
## 2 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054    4014        1        1
## 3 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054    4587        1        1
## 4 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054    4082        1        1
## 5 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054    3021        1        1
## 6 164_8_Tuxpeno_El_Aguacatito_Mex_:250358054    4682        1        1
##      qq_uu nloci
## 1 4062.908 24864
## 2 4013.364 24799
## 3 3859.624 24625
## 4 3968.140 24755
## 5 3986.508 24618
## 6 3971.058 24849
landraces.xo<-read.table(file="xo_Combined_LR13_LR14_parents2_AGPv4_filter1222.txt",
               header=TRUE,stringsAsFactors=FALSE)
head(landraces.xo)
##                  taxon chr  start     end
## 1 13FL990708:250295003   1 262738 1023028
## 2 13FL991001:250307264   1 560842 1211062
## 3 13FL991580:250306177   1 560842 1211062
## 4 13FL992495:250308398   1 560842 1211062
## 5 13FL992927:250426784   1 560842 1211062
## 6 14FL991065:250410291   1 560842 1211062
merged.landraces.xo<-merge(landraces.xo,landraces.parentage,by.x="taxon",by.y="proid")

landraces.child.counts<- landraces.parentage %>% 
  group_by(parent1) %>%
  summarise(progeny_count = length(parent1))

head(landraces.child.counts)
## # A tibble: 6 × 2
##                                       parent1 progeny_count
##                                         <chr>         <int>
## 1  164_1_Tuxpeno_El_Aguacatito_Mex_:250426647           503
## 2 164_11_Tuxpeno_El_Aguacatito_Mex_:250358056           138
## 3 164_14_Tuxpeno_El_Aguacatito_Mex_:250426648           177
## 4  164_2_Tuxpeno_El_Aguacatito_Mex_:250358051           222
## 5  164_4_Tuxpeno_El_Aguacatito_Mex_:250358052           175
## 6  164_5_Tuxpeno_El_Aguacatito_Mex_:250358053             6
head(merged.landraces.xo)
##                  taxon chr     start       end
## 1 13FL990001:250304331   3   1878212   1930707
## 2 13FL990001:250304331   6 114787258 114878239
## 3 13FL990001:250304331   8 156195443 156534874
## 4 13FL990001:250304331  10   3801323   3855495
## 5 13FL990001:250304331   1   6427407   6526221
## 6 13FL990001:250304331   3 227592654 227714108
##                                      parent1
## 1 170_1_Tuxpeno_El_Aguacatito_Mex_:250358087
## 2 170_1_Tuxpeno_El_Aguacatito_Mex_:250358087
## 3 170_1_Tuxpeno_El_Aguacatito_Mex_:250358087
## 4 170_1_Tuxpeno_El_Aguacatito_Mex_:250358087
## 5 170_1_Tuxpeno_El_Aguacatito_Mex_:250358087
## 6 170_1_Tuxpeno_El_Aguacatito_Mex_:250358087
##                                       parent2 progeny parent_1 parent_2
## 1 171_11_Tuxpeno_El_Aguacatito_Mex_:250358103       1       29       30
## 2 171_11_Tuxpeno_El_Aguacatito_Mex_:250358103       1       29       30
## 3 171_11_Tuxpeno_El_Aguacatito_Mex_:250358103       1       29       30
## 4 171_11_Tuxpeno_El_Aguacatito_Mex_:250358103       1       29       30
## 5 171_11_Tuxpeno_El_Aguacatito_Mex_:250358103       1       29       30
## 6 171_11_Tuxpeno_El_Aguacatito_Mex_:250358103       1       29       30
##      qq_uu nloci
## 1 2608.265 24692
## 2 2608.265 24692
## 3 2608.265 24692
## 4 2608.265 24692
## 5 2608.265 24692
## 6 2608.265 24692
landraces.xo.by.mom<- merged.landraces.xo %>% 
  group_by(parent1) %>%
  summarise(xo_count = length(parent1))

head(landraces.xo.by.mom)
## # A tibble: 6 × 2
##                                       parent1 xo_count
##                                         <chr>    <int>
## 1  164_1_Tuxpeno_El_Aguacatito_Mex_:250426647    16329
## 2 164_11_Tuxpeno_El_Aguacatito_Mex_:250358056     5336
## 3 164_14_Tuxpeno_El_Aguacatito_Mex_:250426648     5466
## 4  164_2_Tuxpeno_El_Aguacatito_Mex_:250358051     6510
## 5  164_4_Tuxpeno_El_Aguacatito_Mex_:250358052     5075
## 6  164_8_Tuxpeno_El_Aguacatito_Mex_:250358054     7743
landraces.merged.moms<-inner_join(landraces.xo.by.mom,landraces.child.counts)

head(landraces.merged.moms)
## # A tibble: 6 × 3
##                                       parent1 xo_count progeny_count
##                                         <chr>    <int>         <int>
## 1  164_1_Tuxpeno_El_Aguacatito_Mex_:250426647    16329           503
## 2 164_11_Tuxpeno_El_Aguacatito_Mex_:250358056     5336           138
## 3 164_14_Tuxpeno_El_Aguacatito_Mex_:250426648     5466           177
## 4  164_2_Tuxpeno_El_Aguacatito_Mex_:250358051     6510           222
## 5  164_4_Tuxpeno_El_Aguacatito_Mex_:250358052     5075           175
## 6  164_8_Tuxpeno_El_Aguacatito_Mex_:250358054     7743           254
landraces.xo.over.progeny.count<-data.frame(rep("landraces",dim(landraces.merged.moms)[1]),landraces.merged.moms$xo_count/landraces.merged.moms$progeny_count)
names(landraces.xo.over.progeny.count)<-c("teo.or.maize","xo_over_family_count")


xo.over.progeny.count<-rbind(teosinte.xo.over.progeny.count,landraces.xo.over.progeny.count)
head(xo.over.progeny.count)
##   teo.or.maize xo_over_family_count
## 1     teosinte             26.83654
## 2     teosinte             24.22973
## 3     teosinte             27.00000
## 4     teosinte             26.41406
## 5     teosinte             26.71429
## 6     teosinte             32.77778
tail(xo.over.progeny.count)
##    teo.or.maize xo_over_family_count
## 92    landraces             27.98214
## 93    landraces            118.60000
## 94    landraces             41.50303
## 95    landraces             33.51136
## 96    landraces             29.19231
## 97    landraces             28.00000
boxplot(xo.over.progeny.count$xo_over_family_count ~xo.over.progeny.count$teo.or.maize,
        main="Per family xo/progeny.count")

landraces.values<-xo.over.progeny.count[xo.over.progeny.count$teo.or.maize=="landraces",]$xo_over_family_count
median(landraces.values)
## [1] 29.88302
teosinte.values<-xo.over.progeny.count[xo.over.progeny.count$teo.or.maize=="teosinte",]$xo_over_family_count
median(teosinte.values)
## [1] 26.67045
wilcox.test(landraces.values,teosinte.values)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  landraces.values and teosinte.values
## W = 2044, p-value = 3.195e-10
## alternative hypothesis: true location shift is not equal to 0