Load libraries
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: ggplot2
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:ggplot2':
##
## ggsave
Notes:
* I replace log2FoldChange with 0 for all genes with baseMean==0. These used to be marked NA but we want to include in permutations and note that they show no difference.
teo<-read.table("~/projects/expression_assimilation/teo_anne_newfdr.csv",header=T,sep=",") %>% set_names(c("gene",names(.)[2:7]))
maize<-read.table("~/projects/expression_assimilation/maize_anne_newfdr.csv",header=T,sep=",") %>% set_names(c("gene",names(.)[2:7]))
anne<-merge(teo,maize,by.x="gene",by.y="gene",suffixes=c(".teo",".maize")) %>%
mutate(log2FoldChange.maize=ifelse(is.na(log2FoldChange.maize),0,log2FoldChange.maize), log2FoldChange.teo=ifelse(is.na(log2FoldChange.teo),0,log2FoldChange.teo))
Notes:
zak<-read.csv("~/projects/expression_assimilation/DE_analaysis.csv",header=T) %>% mutate(ParentMaize.Leaf=ifelse(ParentMaize.Leaf==0,1,ParentMaize.Leaf),ParentTeosinte.Leaf=ifelse(ParentTeosinte.Leaf==0,1,ParentTeosinte.Leaf))
zam<-merge(anne,zak,by.x="gene",by.y="Gene",all.x=TRUE)
Let’s set some FDR values and effect size cutoff:
fdr.teo=0.1
fdr.maize=0.1
fdr.zak=0.1
effect.size.teo=0.5
## [1] 3766
## [1] 2603
What does this look like from whole distribution?
ggplot(data=zam,aes(color=and(zam$padj.teo<=fdr.teo,zam$padj.maize>fdr.maize)))+
geom_point(aes(y=log2FoldChange.teo,x=log2FoldChange.maize))+
xlab("400/265 maize") + ylab("400/265 teosinte")+background_grid(major = 'yx', minor = "xy")+
scale_color_discrete(name="Candidate")
A lot of the DE genes in teo are lowly expressed. Maybe subset to ones with large-ish changes, say greater than 50%.
ggplot(data=zam,aes(color=and(and(zam$padj.teo<=fdr.teo,zam$padj.maize>fdr.maize),or(zam$log2FoldChange.teo<=log2(1-effect.size.teo),zam$log2FoldChange.teo>log2(1+effect.size.teo)))))+
geom_point(aes(y=log2FoldChange.teo,x=log2FoldChange.maize))+
xlab("400/265 maize") + ylab("400/265 teosinte")+background_grid(major = 'yx', minor = "xy")+
scale_color_discrete(name="Candidate")
How many of these are also in the same direction in maize vs. teosinte (from Lemmon’s data)? We want teo \(\rightarrow\) maize to be same direction modern \(\rightarrow\) holocene. For example, if teo in Holocene is 10 and teo in Modern is 20, we want maize < teosinte. Check code and direction of log effects below.
## [1] 539
And how many are statistically significantly different in Lemmon’s data? Let’s list them.
## [1] 298
## [1] AC192451.3_FG001 AC204711.3_FG002 AC205703.4_FG002 AC209206.3_FG011
## [5] AC210013.4_FG009 AC214602.3_FG012 AC233945.1_FG003 AC234185.1_FG004
## [9] GRMZM2G000076 GRMZM2G000264 GRMZM2G000818 GRMZM2G001660
## [13] GRMZM2G003331 GRMZM2G004528 GRMZM2G004699 GRMZM2G004990
## [17] GRMZM2G005082 GRMZM2G005834 GRMZM2G007201 GRMZM2G007256
## [21] GRMZM2G008108 GRMZM2G009282 GRMZM2G009591 GRMZM2G010091
## [25] GRMZM2G010555 GRMZM2G010765 GRMZM2G014193 GRMZM2G014770
## [29] GRMZM2G015324 GRMZM2G015401 GRMZM2G015955 GRMZM2G017110
## [33] GRMZM2G017197 GRMZM2G018798 GRMZM2G019183 GRMZM2G020742
## [37] GRMZM2G020928 GRMZM2G021482 GRMZM2G021624 GRMZM2G022090
## [41] GRMZM2G022403 GRMZM2G023711 GRMZM2G025154 GRMZM2G025231
## [45] GRMZM2G026922 GRMZM2G028129 GRMZM2G028637 GRMZM2G030241
## [49] GRMZM2G031169 GRMZM2G032218 GRMZM2G032807 GRMZM2G035779
## [53] GRMZM2G036217 GRMZM2G036609 GRMZM2G036708 GRMZM2G037146
## [57] GRMZM2G037630 GRMZM2G038153 GRMZM2G038279 GRMZM2G038536
## [61] GRMZM2G040975 GRMZM2G044107 GRMZM2G044629 GRMZM2G045005
## [65] GRMZM2G046407 GRMZM2G048733 GRMZM2G049538 GRMZM2G052365
## [69] GRMZM2G053319 GRMZM2G053458 GRMZM2G053916 GRMZM2G054115
## [73] GRMZM2G057067 GRMZM2G057930 GRMZM2G058948 GRMZM2G059706
## [77] GRMZM2G059753 GRMZM2G060029 GRMZM2G060079 GRMZM2G060257
## [81] GRMZM2G060647 GRMZM2G060742 GRMZM2G060837 GRMZM2G060866
## [85] GRMZM2G060919 GRMZM2G062488 GRMZM2G063851 GRMZM2G063868
## [89] GRMZM2G064083 GRMZM2G065899 GRMZM2G066578 GRMZM2G067277
## [93] GRMZM2G067417 GRMZM2G067555 GRMZM2G068476 GRMZM2G068707
## [97] GRMZM2G070515 GRMZM2G071119 GRMZM2G073427 GRMZM2G073826
## [101] GRMZM2G074604 GRMZM2G075051 GRMZM2G075336 GRMZM2G075496
## [105] GRMZM2G076392 GRMZM2G076423 GRMZM2G076802 GRMZM2G077415
## [109] GRMZM2G078090 GRMZM2G078416 GRMZM2G078500 GRMZM2G081529
## [113] GRMZM2G082185 GRMZM2G082203 GRMZM2G083402 GRMZM2G083847
## [117] GRMZM2G084279 GRMZM2G084445 GRMZM2G085078 GRMZM2G085301
## [121] GRMZM2G085381 GRMZM2G085550 GRMZM2G085967 GRMZM2G087323
## [125] GRMZM2G092125 GRMZM2G092232 GRMZM2G092947 GRMZM2G093441
## [129] GRMZM2G093623 GRMZM2G095164 GRMZM2G096470 GRMZM2G103812
## [133] GRMZM2G103900 GRMZM2G104147 GRMZM2G104511 GRMZM2G106600
## [137] GRMZM2G109286 GRMZM2G109464 GRMZM2G110140 GRMZM2G111269
## [141] GRMZM2G112149 GRMZM2G112336 GRMZM2G113052 GRMZM2G113373
## [145] GRMZM2G114873 GRMZM2G114899 GRMZM2G117064 GRMZM2G117198
## [149] GRMZM2G117507 GRMZM2G117796 GRMZM2G117956 GRMZM2G119261
## [153] GRMZM2G119745 GRMZM2G122231 GRMZM2G122999 GRMZM2G123140
## [157] GRMZM2G123585 GRMZM2G123977 GRMZM2G124317 GRMZM2G124434
## [161] GRMZM2G126261 GRMZM2G127404 GRMZM2G130173 GRMZM2G131785
## [165] GRMZM2G133838 GRMZM2G135091 GRMZM2G135195 GRMZM2G135446
## [169] GRMZM2G135816 GRMZM2G136106 GRMZM2G137120 GRMZM2G137930
## [173] GRMZM2G138152 GRMZM2G139583 GRMZM2G140614 GRMZM2G141216
## [177] GRMZM2G141600 GRMZM2G142709 GRMZM2G142873 GRMZM2G142984
## [181] GRMZM2G143139 GRMZM2G144083 GRMZM2G145396 GRMZM2G145578
## [185] GRMZM2G147716 GRMZM2G147775 GRMZM2G148387 GRMZM2G149145
## [189] GRMZM2G149201 GRMZM2G149553 GRMZM2G150295 GRMZM2G150893
## [193] GRMZM2G155021 GRMZM2G155321 GRMZM2G155502 GRMZM2G157027
## [197] GRMZM2G157147 GRMZM2G158972 GRMZM2G161566 GRMZM2G161728
## [201] GRMZM2G161809 GRMZM2G161988 GRMZM2G162486 GRMZM2G162690
## [205] GRMZM2G162758 GRMZM2G162814 GRMZM2G163154 GRMZM2G163200
## [209] GRMZM2G163514 GRMZM2G163826 GRMZM2G164062 GRMZM2G164502
## [213] GRMZM2G164669 GRMZM2G165390 GRMZM2G165961 GRMZM2G166639
## [217] GRMZM2G166719 GRMZM2G166767 GRMZM2G168386 GRMZM2G170805
## [221] GRMZM2G171367 GRMZM2G172043 GRMZM2G172512 GRMZM2G172584
## [225] GRMZM2G173882 GRMZM2G174145 GRMZM2G174708 GRMZM2G177349
## [229] GRMZM2G177386 GRMZM2G177659 GRMZM2G178693 GRMZM2G178787
## [233] GRMZM2G181371 GRMZM2G312521 GRMZM2G315375 GRMZM2G315806
## [237] GRMZM2G320023 GRMZM2G324248 GRMZM2G324462 GRMZM2G327234
## [241] GRMZM2G328893 GRMZM2G330302 GRMZM2G332259 GRMZM2G334899
## [245] GRMZM2G342243 GRMZM2G343588 GRMZM2G359331 GRMZM2G360455
## [249] GRMZM2G365961 GRMZM2G370081 GRMZM2G377780 GRMZM2G379005
## [253] GRMZM2G380732 GRMZM2G382273 GRMZM2G392125 GRMZM2G405178
## [257] GRMZM2G405203 GRMZM2G410352 GRMZM2G410991 GRMZM2G411159
## [261] GRMZM2G411956 GRMZM2G421279 GRMZM2G424873 GRMZM2G426917
## [265] GRMZM2G430362 GRMZM2G440208 GRMZM2G451231 GRMZM2G466780
## [269] GRMZM2G467992 GRMZM2G468132 GRMZM2G469898 GRMZM2G472376
## [273] GRMZM2G473001 GRMZM2G475017 GRMZM2G477200 GRMZM2G477743
## [277] GRMZM2G587368 GRMZM5G812660 GRMZM5G813055 GRMZM5G822137
## [281] GRMZM5G826765 GRMZM5G829840 GRMZM5G830874 GRMZM5G832740
## [285] GRMZM5G834758 GRMZM5G837606 GRMZM5G855337 GRMZM5G859195
## [289] GRMZM5G872118 GRMZM5G872499 GRMZM5G877388 GRMZM5G877500
## [293] GRMZM5G883759 GRMZM5G886561 GRMZM5G888263 GRMZM5G891187
## [297] GRMZM5G891944 GRMZM5G898740
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221
How many are also domestication genes?
## [1] 6
Let’s list those too.
## [1] GRMZM2G009282 GRMZM2G014193 GRMZM2G060079 GRMZM2G104147 GRMZM2G147716
## [6] GRMZM2G177349
## 39475 Levels: AC148152.3_FG001 AC148152.3_FG005 ... GRMZM6G998221
What percent of random samples of similar size have more domestication genes.
## [1] 0.758
Are there more cis-regulated genes in those candidates (DE teo, not DE maize, DE maize vs. teosinte) that were also selected?
#category of non-selection candidates
table(filter(zam.candidates,zam.candidates$Domestication==FALSE)$Reg.Cat.Leaf)/sum(table(filter(zam.candidates,zam.candidates$Domestication==FALSE)$Reg.Cat.Leaf))
##
## Ambiguous Cis + Trans Cis only Cis x Trans Compensatory
## 0.10273973 0.29109589 0.23630137 0.07534247 0.01027397
## Conserved Trans only
## 0.03424658 0.25000000
table(filter(zam.candidates,zam.candidates$Domestication==TRUE)$Reg.Cat.Leaf)/sum(table(filter(zam.candidates,zam.candidates$Domestication==TRUE)$Reg.Cat.Leaf))
##
## Ambiguous Cis + Trans Cis only Cis x Trans Compensatory
## 0.0000000 0.5000000 0.1666667 0.0000000 0.0000000
## Conserved Trans only
## 0.0000000 0.3333333