Load libraries
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
Load Anne’s DESeq results. These lists include all genes.
Notes:
teo<-read.table("~/Desktop/teo_anne.csv",header=T,sep=",") %>% mutate(padj=p.adjust(pvalue,method="fdr"))
maize<-read.table("~/Desktop/maize_anne.csv",header=T,sep=",") %>% mutate(padj=p.adjust(pvalue,method="fdr"))
anne<-merge(teo,maize,by.x="gene",by.y="gene",suffixes=c(".teo",".maize")) %>% mutate(log2FoldChange.maize=ifelse(is.na(log2FoldChange.maize),0,log2FoldChange.maize), log2FoldChange.teo=ifelse(is.na(log2FoldChange.teo),0,log2FoldChange.teo))
Load Zak’s allele specific expression results and combine with Anne’s. We lose some genes – down to ~25K.
Notes:
zak<-read.csv("~/Desktop/DE_analaysis.csv",header=T) %>% mutate(ParentMaize.Leaf=ifelse(ParentMaize.Leaf==0,1,ParentMaize.Leaf),ParentTeosinte.Leaf=ifelse(ParentTeosinte.Leaf==0,1,ParentTeosinte.Leaf))
zam<-merge(anne,zak,by.x="gene",by.y="Gene")
How many teosinte DE genes?
length(filter(zam,zam$padj.teo<0.05)[,1])
## [1] 135
How many of those are not DE in maize?
length(filter(zam,zam$padj.teo<=0.05,zam$padj.maize>0.05)[,1])
## [1] 105
And how many are in the same direction in maize vs. teosinte (from Lemmon’s data) as they are in modern vs. holocene (from our data)?
length(filter(zam,zam$padj.teo<=0.05,zam$padj.maize>0.05,zam$log2FoldChange.teo*log2(zam$ParentMaize.Leaf/zam$ParentTeosinte.Leaf)>0)[,1])
## [1] 47
Let’s list those
zam.candidates<-filter(zam,zam$padj.teo<=0.05,zam$padj.maize>0.05,zam$log2FoldChange.teo*log2(zam$ParentMaize.Leaf/zam$ParentTeosinte.Leaf)>0)
zam.candidates$gene
## [1] GRMZM2G003765 GRMZM2G016705 GRMZM2G025409 GRMZM2G026672 GRMZM2G029912
## [6] GRMZM2G032190 GRMZM2G034260 GRMZM2G051613 GRMZM2G058655 GRMZM2G060554
## [11] GRMZM2G065800 GRMZM2G066516 GRMZM2G068947 GRMZM2G070312 GRMZM2G071119
## [16] GRMZM2G073814 GRMZM2G073826 GRMZM2G077655 GRMZM2G080530 GRMZM2G081519
## [21] GRMZM2G083402 GRMZM2G083418 GRMZM2G090230 GRMZM2G101412 GRMZM2G117388
## [26] GRMZM2G121228 GRMZM2G128518 GRMZM2G130912 GRMZM2G131329 GRMZM2G134471
## [31] GRMZM2G135400 GRMZM2G142964 GRMZM2G149145 GRMZM2G150950 GRMZM2G155285
## [36] GRMZM2G160046 GRMZM2G168228 GRMZM2G169481 GRMZM2G302074 GRMZM2G303465
## [41] GRMZM2G314692 GRMZM2G318843 GRMZM2G322661 GRMZM2G428197 GRMZM2G458538
## [46] GRMZM2G482290 GRMZM5G891990
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221
How many are also domestication genes.
length(filter(zam.candidates,Domestication==TRUE)[,1])
## [1] 3
Let’s list those too
filter(zam.candidates,Domestication==TRUE)$gene
## [1] GRMZM2G121228 GRMZM2G303465 GRMZM2G314692
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221
What percent of random samples of similar size have more domestication genes.
sum(sapply(1:1000,function(x) sum(sample(zam$Domestication,47))>3))/1000
## [1] 0.161
So we have 47 potential assimilation loci and 3 awesome candidates that also show selection.
Finally, because I redid adjusted p-values, here’s the list of DE genes in teosinte:
filter(zam,zam$padj.teo<=0.05)$gene
## [1] AC205608.4_FG004 AC235546.1_FG002 GRMZM2G002131 GRMZM2G002642
## [5] GRMZM2G003765 GRMZM2G005233 GRMZM2G016705 GRMZM2G018649
## [9] GRMZM2G021233 GRMZM2G025248 GRMZM2G025409 GRMZM2G026490
## [13] GRMZM2G026672 GRMZM2G029912 GRMZM2G030125 GRMZM2G030809
## [17] GRMZM2G031177 GRMZM2G032190 GRMZM2G034260 GRMZM2G037064
## [21] GRMZM2G039173 GRMZM2G039325 GRMZM2G039880 GRMZM2G041344
## [25] GRMZM2G051613 GRMZM2G052279 GRMZM2G056582 GRMZM2G058655
## [29] GRMZM2G059021 GRMZM2G059574 GRMZM2G060554 GRMZM2G061492
## [33] GRMZM2G065800 GRMZM2G066489 GRMZM2G066516 GRMZM2G066997
## [37] GRMZM2G067426 GRMZM2G068193 GRMZM2G068947 GRMZM2G069528
## [41] GRMZM2G069542 GRMZM2G070312 GRMZM2G071119 GRMZM2G073814
## [45] GRMZM2G073826 GRMZM2G074631 GRMZM2G074914 GRMZM2G077655
## [49] GRMZM2G079484 GRMZM2G080516 GRMZM2G080530 GRMZM2G081519
## [53] GRMZM2G081626 GRMZM2G083176 GRMZM2G083402 GRMZM2G083418
## [57] GRMZM2G090230 GRMZM2G092877 GRMZM2G095964 GRMZM2G099678
## [61] GRMZM2G101412 GRMZM2G103342 GRMZM2G104237 GRMZM2G107737
## [65] GRMZM2G107774 GRMZM2G115901 GRMZM2G117388 GRMZM2G117836
## [69] GRMZM2G118637 GRMZM2G121228 GRMZM2G121516 GRMZM2G121937
## [73] GRMZM2G122873 GRMZM2G127844 GRMZM2G128518 GRMZM2G130528
## [77] GRMZM2G130868 GRMZM2G130912 GRMZM2G131245 GRMZM2G131329
## [81] GRMZM2G134471 GRMZM2G135400 GRMZM2G138888 GRMZM2G141526
## [85] GRMZM2G142964 GRMZM2G143522 GRMZM2G149145 GRMZM2G150950
## [89] GRMZM2G151087 GRMZM2G151299 GRMZM2G151975 GRMZM2G153877
## [93] GRMZM2G155285 GRMZM2G159559 GRMZM2G160046 GRMZM2G161040
## [97] GRMZM2G161902 GRMZM2G164175 GRMZM2G164649 GRMZM2G168228
## [101] GRMZM2G169481 GRMZM2G169569 GRMZM2G171677 GRMZM2G173654
## [105] GRMZM2G176217 GRMZM2G300841 GRMZM2G302074 GRMZM2G303465
## [109] GRMZM2G314692 GRMZM2G318843 GRMZM2G320591 GRMZM2G322661
## [113] GRMZM2G341309 GRMZM2G342907 GRMZM2G359333 GRMZM2G375504
## [117] GRMZM2G409224 GRMZM2G419844 GRMZM2G426953 GRMZM2G428197
## [121] GRMZM2G430685 GRMZM2G436084 GRMZM2G458538 GRMZM2G461936
## [125] GRMZM2G464676 GRMZM2G478779 GRMZM2G482290 GRMZM2G540772
## [129] GRMZM5G805387 GRMZM5G807267 GRMZM5G822970 GRMZM5G843914
## [133] GRMZM5G877773 GRMZM5G878008 GRMZM5G891990
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221