Load libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)

Load Anne’s DESeq results. These lists include all genes.
Notes:

teo<-read.table("~/Desktop/teo_anne.csv",header=T,sep=",") %>% mutate(padj=p.adjust(pvalue,method="fdr"))
maize<-read.table("~/Desktop/maize_anne.csv",header=T,sep=",") %>% mutate(padj=p.adjust(pvalue,method="fdr"))
anne<-merge(teo,maize,by.x="gene",by.y="gene",suffixes=c(".teo",".maize")) %>% mutate(log2FoldChange.maize=ifelse(is.na(log2FoldChange.maize),0,log2FoldChange.maize), log2FoldChange.teo=ifelse(is.na(log2FoldChange.teo),0,log2FoldChange.teo))

Load Zak’s allele specific expression results and combine with Anne’s. We lose some genes – down to ~25K.
Notes:

zak<-read.csv("~/Desktop/DE_analaysis.csv",header=T) %>% mutate(ParentMaize.Leaf=ifelse(ParentMaize.Leaf==0,1,ParentMaize.Leaf),ParentTeosinte.Leaf=ifelse(ParentTeosinte.Leaf==0,1,ParentTeosinte.Leaf))
zam<-merge(anne,zak,by.x="gene",by.y="Gene") 

How many teosinte DE genes?

length(filter(zam,zam$padj.teo<0.05)[,1])
## [1] 135

How many of those are not DE in maize?

length(filter(zam,zam$padj.teo<=0.05,zam$padj.maize>0.05)[,1])
## [1] 105

And how many are in the same direction in maize vs. teosinte (from Lemmon’s data) as they are in modern vs. holocene (from our data)?

length(filter(zam,zam$padj.teo<=0.05,zam$padj.maize>0.05,zam$log2FoldChange.teo*log2(zam$ParentMaize.Leaf/zam$ParentTeosinte.Leaf)>0)[,1])
## [1] 47

Let’s list those

zam.candidates<-filter(zam,zam$padj.teo<=0.05,zam$padj.maize>0.05,zam$log2FoldChange.teo*log2(zam$ParentMaize.Leaf/zam$ParentTeosinte.Leaf)>0)
zam.candidates$gene
##  [1] GRMZM2G003765 GRMZM2G016705 GRMZM2G025409 GRMZM2G026672 GRMZM2G029912
##  [6] GRMZM2G032190 GRMZM2G034260 GRMZM2G051613 GRMZM2G058655 GRMZM2G060554
## [11] GRMZM2G065800 GRMZM2G066516 GRMZM2G068947 GRMZM2G070312 GRMZM2G071119
## [16] GRMZM2G073814 GRMZM2G073826 GRMZM2G077655 GRMZM2G080530 GRMZM2G081519
## [21] GRMZM2G083402 GRMZM2G083418 GRMZM2G090230 GRMZM2G101412 GRMZM2G117388
## [26] GRMZM2G121228 GRMZM2G128518 GRMZM2G130912 GRMZM2G131329 GRMZM2G134471
## [31] GRMZM2G135400 GRMZM2G142964 GRMZM2G149145 GRMZM2G150950 GRMZM2G155285
## [36] GRMZM2G160046 GRMZM2G168228 GRMZM2G169481 GRMZM2G302074 GRMZM2G303465
## [41] GRMZM2G314692 GRMZM2G318843 GRMZM2G322661 GRMZM2G428197 GRMZM2G458538
## [46] GRMZM2G482290 GRMZM5G891990
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221

How many are also domestication genes.

length(filter(zam.candidates,Domestication==TRUE)[,1])
## [1] 3

Let’s list those too

filter(zam.candidates,Domestication==TRUE)$gene
## [1] GRMZM2G121228 GRMZM2G303465 GRMZM2G314692
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221

What percent of random samples of similar size have more domestication genes.

sum(sapply(1:1000,function(x) sum(sample(zam$Domestication,47))>3))/1000
## [1] 0.161

So we have 47 potential assimilation loci and 3 awesome candidates that also show selection.

Finally, because I redid adjusted p-values, here’s the list of DE genes in teosinte:

filter(zam,zam$padj.teo<=0.05)$gene
##   [1] AC205608.4_FG004 AC235546.1_FG002 GRMZM2G002131    GRMZM2G002642   
##   [5] GRMZM2G003765    GRMZM2G005233    GRMZM2G016705    GRMZM2G018649   
##   [9] GRMZM2G021233    GRMZM2G025248    GRMZM2G025409    GRMZM2G026490   
##  [13] GRMZM2G026672    GRMZM2G029912    GRMZM2G030125    GRMZM2G030809   
##  [17] GRMZM2G031177    GRMZM2G032190    GRMZM2G034260    GRMZM2G037064   
##  [21] GRMZM2G039173    GRMZM2G039325    GRMZM2G039880    GRMZM2G041344   
##  [25] GRMZM2G051613    GRMZM2G052279    GRMZM2G056582    GRMZM2G058655   
##  [29] GRMZM2G059021    GRMZM2G059574    GRMZM2G060554    GRMZM2G061492   
##  [33] GRMZM2G065800    GRMZM2G066489    GRMZM2G066516    GRMZM2G066997   
##  [37] GRMZM2G067426    GRMZM2G068193    GRMZM2G068947    GRMZM2G069528   
##  [41] GRMZM2G069542    GRMZM2G070312    GRMZM2G071119    GRMZM2G073814   
##  [45] GRMZM2G073826    GRMZM2G074631    GRMZM2G074914    GRMZM2G077655   
##  [49] GRMZM2G079484    GRMZM2G080516    GRMZM2G080530    GRMZM2G081519   
##  [53] GRMZM2G081626    GRMZM2G083176    GRMZM2G083402    GRMZM2G083418   
##  [57] GRMZM2G090230    GRMZM2G092877    GRMZM2G095964    GRMZM2G099678   
##  [61] GRMZM2G101412    GRMZM2G103342    GRMZM2G104237    GRMZM2G107737   
##  [65] GRMZM2G107774    GRMZM2G115901    GRMZM2G117388    GRMZM2G117836   
##  [69] GRMZM2G118637    GRMZM2G121228    GRMZM2G121516    GRMZM2G121937   
##  [73] GRMZM2G122873    GRMZM2G127844    GRMZM2G128518    GRMZM2G130528   
##  [77] GRMZM2G130868    GRMZM2G130912    GRMZM2G131245    GRMZM2G131329   
##  [81] GRMZM2G134471    GRMZM2G135400    GRMZM2G138888    GRMZM2G141526   
##  [85] GRMZM2G142964    GRMZM2G143522    GRMZM2G149145    GRMZM2G150950   
##  [89] GRMZM2G151087    GRMZM2G151299    GRMZM2G151975    GRMZM2G153877   
##  [93] GRMZM2G155285    GRMZM2G159559    GRMZM2G160046    GRMZM2G161040   
##  [97] GRMZM2G161902    GRMZM2G164175    GRMZM2G164649    GRMZM2G168228   
## [101] GRMZM2G169481    GRMZM2G169569    GRMZM2G171677    GRMZM2G173654   
## [105] GRMZM2G176217    GRMZM2G300841    GRMZM2G302074    GRMZM2G303465   
## [109] GRMZM2G314692    GRMZM2G318843    GRMZM2G320591    GRMZM2G322661   
## [113] GRMZM2G341309    GRMZM2G342907    GRMZM2G359333    GRMZM2G375504   
## [117] GRMZM2G409224    GRMZM2G419844    GRMZM2G426953    GRMZM2G428197   
## [121] GRMZM2G430685    GRMZM2G436084    GRMZM2G458538    GRMZM2G461936   
## [125] GRMZM2G464676    GRMZM2G478779    GRMZM2G482290    GRMZM2G540772   
## [129] GRMZM5G805387    GRMZM5G807267    GRMZM5G822970    GRMZM5G843914   
## [133] GRMZM5G877773    GRMZM5G878008    GRMZM5G891990   
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221