Setaria, Maize & Sorghum differential expression analysis using Bayseq

library('baySeq')

## Loading required package: GenomicRanges
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist
## 
## Loading required package: IRanges
## Loading required package: XVector
## 
## Attaching package: 'baySeq'
## 
## The following object is masked from 'package:GenomicRanges':
## 
##     rbind
## 
## The following object is masked from 'package:IRanges':
## 
##     rbind
## 
## The following object is masked from 'package:BiocGenerics':
## 
##     rbind
## 
## The following object is masked from 'package:base':
## 
##     rbind

library('ggplot2')

Because Bayseq is designed to run massive experiments is better to use parallel processing.

cl <- makeCluster(6)

Prepare data by merging all datasets and use one single annotation (Sorghum)

setwd("~/documents/data/sorghum/Si_SB_Zm")
Setaria <- read.delim("RSEMcountsfinal_Si.txt", header=TRUE, sep ="\t", as.is=TRUE)
Setaria$gene <-gsub(".g", "", Setaria$gene)

Maize <- read.delim("RSEMcountsfinal_Zm.txt", header=TRUE, sep ="\t", as.is =TRUE)

Sorghum <- read.delim("RSEMcountsfinal_Sb.txt", header=TRUE, sep ="\t", as.is =TRUE)
colnames(Sorghum) <- c("gene", "sM1","sM2","sM3","sBS1","sBS2","sBS3")


annotation <- read.delim("sb-si-zm.csv", header=FALSE, sep ="\t", as.is=TRUE)
annotation <- annotation[,c(-1,-3,-5,-6)]
names(annotation) <- c("Si_Zm","Sb")


annotation$Sb <-gsub("\\.1$", "", annotation$Sb)
annotation$Si_Zm <- gsub("FGT([0-9]{3})$", "FG\\1",annotation$Si_Zm)
annotation$Si_Zm <- gsub("_T[0-9][0-9]$", "",annotation$Si_Zm)

head(Setaria)

##        gene   M1   M2   M3  BS1  BS2  BS3
## 1 Si000001m  645  644  616  818  616  756
## 2 Si000002m 2102 3658 2466 3219 2877 2209
## 3 Si000003m  201  516  349 2345 1699 1361
## 4 Si000006m  378  601  476 4405 4559 4435
## 5 Si000007m 1170 1851 1320 1246 1315 1046
## 6 Si000008m 2309 3986 2647 7187 6101 5292

head(Maize)

##               gene CMR1 CMR2 CBSR1 CBSR2
## 1 AC148152.3_FG001    1    1     1     2
## 2 AC148152.3_FG008 2497 2571  2486  2180
## 3 AC148167.6_FG001 1271 1372  1283  1236
## 4 AC149475.2_FG002    4    5    13     3
## 5 AC149475.2_FG003  160  146   343   278
## 6 AC149475.2_FG005  311  307   662   575

head(Sorghum)

##            gene sM1  sM2  sM3 sBS1 sBS2 sBS3
## 1 Sb0010s003420 163  121  206  144  190  165
## 2 Sb0010s003440  43   25   37   94  111  108
## 3 Sb0010s007570 525 1062 1220   76   97  108
## 4 Sb0010s007790  89   95  161   25   64   55
## 5 Sb0010s010920   1    5    7    1    2    1
## 6 Sb0010s012040   8   16   14   33   42   20

head(annotation)

##       Si_Zm          Sb
## 1 Si001144m Sb03g004970
## 2 Si001411m Sb03g006170
## 3 Si000179m Sb03g034680
## 4 Si000865m Sb03g035960
## 5 Si000413m Sb03g008580
## 6 Si005128m Sb03g005410

Merge the annotation dataframe with each species independently. If I merge the 3 sets together at this point, I can loose information about genes that are present in some species but not in others. For example Si026007m is M-specific in both Si and Sb but it does not appear in Zm.

Sibayseqdata <- merge(x = annotation, y = Setaria,by.x ="Si_Zm", by.y="gene", all = F)
Zmbayseqdata <- merge(x = annotation, y = Maize,by.x ="Si_Zm", by.y="gene", all = F)
Sbbayseqdata <- merge(x = annotation, y = Sorghum,by.x ="Sb", by.y="gene", all = F)

Since Sb annotation matches more than one identifier in Setaria and Maize I normalized all counts by summing instances matching the same Sb identifier. I am just summing instances since bayseq does its own normalization. My concern is that we may be retrieving

library(plyr)

## 
## Attaching package: 'plyr'
## 
## The following object is masked from 'package:XVector':
## 
##     compact
## 
## The following objects are masked from 'package:IRanges':
## 
##     desc, rename

Sibayseqdatanorm <- ddply(Sibayseqdata, 'Sb', numcolwise(sum))
head(Sibayseqdatanorm)

##              Sb   M1   M2   M3 BS1 BS2 BS3
## 1 Sb0010s003420  548  784  537 670 506 568
## 2 Sb0010s007570  167 1620  414 175  98 118
## 3 Sb0010s007790  138  247  184  39  31  33
## 4 Sb0010s010920    1    2    1   4   5   6
## 5 Sb0010s013050    1    1    2   6  17  18
## 6 Sb0010s016080 2848 2569 2182 546 748 840

Sbbayseqdatanorm <- ddply(Sbbayseqdata, 'Sb', numcolwise(sum))
head(Sbbayseqdatanorm)

##              Sb  sM1  sM2  sM3 sBS1 sBS2 sBS3
## 1 Sb0010s003420  489  363  618  432  570  495
## 2 Sb0010s007570 1575 3186 3660  228  291  324
## 3 Sb0010s007790   89   95  161   25   64   55
## 4 Sb0010s010920    2   10   14    2    4    2
## 5 Sb0010s012040   16   32   28   66   84   40
## 6 Sb0010s013040   10    2    6   12   12   14

Zmbayseqdatanorm <- ddply(Zmbayseqdata, 'Sb', numcolwise(sum))
head(Zmbayseqdatanorm)

##              Sb CMR1 CMR2 CBSR1 CBSR2
## 1 Sb0010s003420  857  752  1327  1373
## 2 Sb0010s003450    1    1     1     3
## 3 Sb0010s007570    6    4     1     1
## 4 Sb0010s010920    1    1    14     5
## 5 Sb0010s012040    1    3     1     1
## 6 Sb0010s013040    1    1     2     2

Then merged everything

bayseqdata <- merge(x= Sbbayseqdatanorm, y=Sibayseqdatanorm , by.x = "Sb", by.y="Sb", all = F)
colnames(bayseqdata) <- c("Sb", 
                          "sM1","sM2","sM3","sBS1","sBS2","sBS3", "M1", "M2","M3",
                          "BS1","BS2","BS3")
head(bayseqdata)

##              Sb  sM1  sM2  sM3 sBS1 sBS2 sBS3   M1   M2   M3 BS1 BS2 BS3
## 1 Sb0010s003420  489  363  618  432  570  495  548  784  537 670 506 568
## 2 Sb0010s007570 1575 3186 3660  228  291  324  167 1620  414 175  98 118
## 3 Sb0010s007790   89   95  161   25   64   55  138  247  184  39  31  33
## 4 Sb0010s010920    2   10   14    2    4    2    1    2    1   4   5   6
## 5 Sb0010s013050    6   18   36  129  114  126    1    1    2   6  17  18
## 6 Sb0010s016080 3280 3370 5052 1216 1996 3002 2848 2569 2182 546 748 840

bayseqdata <- merge(x= bayseqdata, y=Zmbayseqdatanorm , by.x = "Sb", by.y="Sb", all = F)
colnames(bayseqdata) <- c("genes", 
                          "sbM1","sbM2","sbM3","sbBS1","sbBS2","sbBS3", "siM1", "siM2","siM3",
                          "siBS1","siBS2","siBS3","zmM1","zmM2","zmBS1","zmBS2")


bayseqdata <- unique(bayseqdata)
head(bayseqdata)

##           genes sbM1 sbM2 sbM3 sbBS1 sbBS2 sbBS3 siM1 siM2 siM3 siBS1
## 1 Sb0010s003420  489  363  618   432   570   495  548  784  537   670
## 2 Sb0010s007570 1575 3186 3660   228   291   324  167 1620  414   175
## 3 Sb0010s010920    2   10   14     2     4     2    1    2    1     4
## 4 Sb0010s013050    6   18   36   129   114   126    1    1    2     6
## 5 Sb0010s016080 3280 3370 5052  1216  1996  3002 2848 2569 2182   546
## 6 Sb0010s017130    3    3    6    18     3     6    5   10    6     6
##   siBS2 siBS3 zmM1 zmM2 zmBS1 zmBS2
## 1   506   568  857  752  1327  1373
## 2    98   118    6    4     1     1
## 3     5     6    1    1    14     5
## 4    17    18    1    1   326   128
## 5   748   840 5690 5414  1292  1090
## 6     5    14    2    4    18    22

counts <- bayseqdata[,-1]
head(counts)

##   sbM1 sbM2 sbM3 sbBS1 sbBS2 sbBS3 siM1 siM2 siM3 siBS1 siBS2 siBS3 zmM1
## 1  489  363  618   432   570   495  548  784  537   670   506   568  857
## 2 1575 3186 3660   228   291   324  167 1620  414   175    98   118    6
## 3    2   10   14     2     4     2    1    2    1     4     5     6    1
## 4    6   18   36   129   114   126    1    1    2     6    17    18    1
## 5 3280 3370 5052  1216  1996  3002 2848 2569 2182   546   748   840 5690
## 6    3    3    6    18     3     6    5   10    6     6     5    14    2
##   zmM2 zmBS1 zmBS2
## 1  752  1327  1373
## 2    4     1     1
## 3    1    14     5
## 4    1   326   128
## 5 5414  1292  1090
## 6    4    18    22

rownames(counts) <- bayseqdata$genes
head(counts)

##               sbM1 sbM2 sbM3 sbBS1 sbBS2 sbBS3 siM1 siM2 siM3 siBS1 siBS2
## Sb0010s003420  489  363  618   432   570   495  548  784  537   670   506
## Sb0010s007570 1575 3186 3660   228   291   324  167 1620  414   175    98
## Sb0010s010920    2   10   14     2     4     2    1    2    1     4     5
## Sb0010s013050    6   18   36   129   114   126    1    1    2     6    17
## Sb0010s016080 3280 3370 5052  1216  1996  3002 2848 2569 2182   546   748
## Sb0010s017130    3    3    6    18     3     6    5   10    6     6     5
##               siBS3 zmM1 zmM2 zmBS1 zmBS2
## Sb0010s003420   568  857  752  1327  1373
## Sb0010s007570   118    6    4     1     1
## Sb0010s010920     6    1    1    14     5
## Sb0010s013050    18    1    1   326   128
## Sb0010s016080   840 5690 5414  1292  1090
## Sb0010s017130    14    2    4    18    22

library(reshape2)
cm <- melt(cor(counts))
cm$Var1 <- as.factor(cm$Var1)
cm$Var2 <- as.factor(cm$Var2)
mid<-(max(cm$value, na.rm=TRUE)+min(cm$value, na.rm=TRUE))/2

Before starting bayseq is sensible to do some exploration of the data.

Check count correlation

ggplot(data=cm, aes(x=Var1, y=Var2)) +
  geom_tile(aes(fill=value)) +
  ggtitle("Effective Count Correlation matrix") +
  geom_text(aes(label = round(value, 2))) +
  xlab('') +
  ylab('') +
  scale_fill_gradient2(low="cyan", mid="yellow", high="red", midpoint=mid, space="Lab", na.value = "grey50", guide="colorbar")

plot of chunk unnamed-chunk-8

Log distributions of Effective Counts

cf <- as.data.frame(counts)
head(cf)

##               sbM1 sbM2 sbM3 sbBS1 sbBS2 sbBS3 siM1 siM2 siM3 siBS1 siBS2
## Sb0010s003420  489  363  618   432   570   495  548  784  537   670   506
## Sb0010s007570 1575 3186 3660   228   291   324  167 1620  414   175    98
## Sb0010s010920    2   10   14     2     4     2    1    2    1     4     5
## Sb0010s013050    6   18   36   129   114   126    1    1    2     6    17
## Sb0010s016080 3280 3370 5052  1216  1996  3002 2848 2569 2182   546   748
## Sb0010s017130    3    3    6    18     3     6    5   10    6     6     5
##               siBS3 zmM1 zmM2 zmBS1 zmBS2
## Sb0010s003420   568  857  752  1327  1373
## Sb0010s007570   118    6    4     1     1
## Sb0010s010920     6    1    1    14     5
## Sb0010s013050    18    1    1   326   128
## Sb0010s016080   840 5690 5414  1292  1090
## Sb0010s017130    14    2    4    18    22

cf$contig <- rownames(counts)
counts_melt <- melt(cf, id='contig')
ggplot(counts_melt, aes(x=log(value), colour=variable)) +
  geom_density() +
  ggtitle("Effective Count Log Density")

plot of chunk unnamed-chunk-9

Box plot with outliers

ggplot(counts_melt, aes(x=variable, y=value, colour=variable)) + 
  geom_boxplot() +
  ggtitle("Boxplots")

plot of chunk unnamed-chunk-10

Plot distribution of counts over 50,000

counts_50k <- counts_melt[counts_melt$value > 50000,]
ggplot(counts_50k, aes(x=variable, y=value, colour=variable)) + 
  geom_boxplot() +
  ggtitle("Boxplot of Outliers")

plot of chunk unnamed-chunk-11 Apparently the majority of the outliers come from Sb.

Run Bayseq on merged samples

row.names(bayseqdata) <-bayseqdata$genes
bayseqdata <-bayseqdata[,-1]
bayseqmatrix <- data.matrix(bayseqdata)

Defining our replicate and group structure

replicates <- as.factor(c("sbm","sbm","sbm",
                          "sbbs","sbbs","sbbs",
                          "sim","sim","sim",
                          "sibs","sibs","sibs",
                          "zmm","zmm","zmbs","zmbs")) 
groups <- list(NDE = factor(rep(1,16)),
               DE = factor(c("m","m","m","bs","bs","bs",
                                      "m","m","m","bs","bs","bs",
                                      "m","m","bs","bs")))

Combine the count data and user defined groups into a countData object

CD <- new("countData",
          data = bayseqmatrix,
          replicates = replicates, 
          groups = groups)

Libray sizes can be inferred from the data if the user is not able to supply Here we take the sum of the reads below the nth percentile of the data( instead of just using the total number of sequenced reads aligning to the genome)

libsizes(CD) <- getLibsizes(CD, estimationType= "quantile")
head(libsizes(CD))

##    sbM1    sbM2    sbM3   sbBS1   sbBS2   sbBS3 
## 2436094 1828404 2734902 3749472 4541815 4581751

We estimate an empirical distribution on the parameters of the Negative Binomial distribution by bootstraping from the data, taking individual counts and finding quasi-likelihood parameters for a Negative Binomial distribution

CD <- getPriors.NB(CD, samplesize = 10000, estimation = "QL", cl = cl)

## Finding priors...done.

Before getting posterior likelihoods for the data is good to consider the possibility that some loci will not be expressed at all in the data

plotPriors(CD, group= "NDE")

plot of chunk unnamed-chunk-17

Acquire posterior likelihoods, estimating the proportinos of DE expressed counts

CD <- getLikelihoods.NB(CD, pET = 'BIC', cl = cl)

## Finding posterior likelihoods...

## .

## done.

Ask for the top candidates for differential expression using the topCounts function and generate a normalized output object we can export

topCounts(CD, group ="DE")

##             rowID    sbM1    sbM2    sbM3  sbBS1  sbBS2  sbBS3   siM1
## Sb01g000570    75    4222    3758    4452 122756 127684 132314  21789
## Sb01g011780   826   80796  110784  128040   1707   2766   2352  50288
## Sb01g015400  1027 1464196 1966940 1898912 837736 665744 739016 147316
## Sb01g028990  1512       8       8      16    364    408    310      6
## Sb01g033070  1733      20      28      38    464    380    320     32
## Sb01g033620  1763     906    1028    1480  58616  35784  40440    176
## Sb01g039550  2158      22      26      48   1616   2240   2234      4
## Sb01g039980  2192    6256    9102    8126 143994 131466 152576   4447
## Sb01g040720  2242     120      72      87   6942   8190   6279    234
## Sb01g042450  2370      87     153      96  23571  29970  11931    246
##               siM2   siM3  siBS1  siBS2  siBS3   zmM1   zmM2  zmBS1  zmBS2
## Sb01g000570  36054  22170  68529  68481  62004   8841   9691  50247  48546
## Sb01g011780  35917  36661   1928   3244   1991   4383   3197    290    245
## Sb01g015400 281939 158370 223834 275703 264105 425953 448833 105406 143382
## Sb01g028990      7      5    215    178    305      2      2    170    121
## Sb01g033070     23     26   3226   1250   1543      7      7   2733   1849
## Sb01g033620    196    211  19950  12198  11709     49     52  32226  33098
## Sb01g039550      5      5    954    846    959      7      4   1362   1123
## Sb01g039980   6649   3979  66849  87667  70871   1742   1675  23618  23261
## Sb01g040720    255    224   2742   2626   3056   1141   1043 449368 461074
## Sb01g042450    217    183  17509  20338  31942     62     61   9659   9259
##             Likelihood   DE FDR.DE
## Sb01g000570          1 bs>m      0
## Sb01g011780          1 m>bs      0
## Sb01g015400          1 m>bs      0
## Sb01g028990          1 bs>m      0
## Sb01g033070          1 bs>m      0
## Sb01g033620          1 bs>m      0
## Sb01g039550          1 bs>m      0
## Sb01g039980          1 bs>m      0
## Sb01g040720          1 bs>m      0
## Sb01g042450          1 bs>m      0

output_DE <- topCounts(CD, group = "DE", number = 100000, normaliseData=TRUE)
write.table(output_DE, file="monocots_bayseq.txt", sep="\t", row.names=TRUE, quote=F)

Manually calculate logfold changes

head(output_DE)

##             rowID    sbM1    sbM2    sbM3  sbBS1  sbBS2  sbBS3   siM1
## Sb01g000570    75    4117    4882    3867  77765  66775  68594  21845
## Sb01g011780   826   78778  143918  111202   1081   1447   1219  50417
## Sb01g015400  1027 1427626 2555219 1649196 530697 348167 383117 147692
## Sb01g028990  1512       8      10      14    231    213    161      6
## Sb01g033070  1733      20      36      33    294    199    166     32
## Sb01g033620  1763     883    1335    1285  37133  18714  20965    176
##               siM2   siM3  siBS1  siBS2  siBS3   zmM1   zmM2  zmBS1  zmBS2
## Sb01g000570  25447  22532  73032  76463  65470  15766  19192  61108  66208
## Sb01g011780  25350  37260   2055   3622   2102   7816   6331    353    334
## Sb01g015400 198992 160959 238542 307836 278870 759588 888866 128190 195547
## Sb01g028990      5      5    229    199    322      4      4    207    165
## Sb01g033070     16     26   3438   1396   1629     12     14   3324   2522
## Sb01g033620    138    214  21261  13620  12364     87    103  39192  45140
##             Likelihood   DE FDR.DE
## Sb01g000570          1 bs>m      0
## Sb01g011780          1 m>bs      0
## Sb01g015400          1 m>bs      0
## Sb01g028990          1 bs>m      0
## Sb01g033070          1 bs>m      0
## Sb01g033620          1 bs>m      0

output_DE["genes"] <-rownames(output_DE)
output_DE[c("log2FC_Sb","log2FC_Si","log2FC_Zm","log2FC_all")] <-NA
output_DE[c("sbm","sbbs","sim","sibs","zmm","zmbs")] <-NA
output_DE <- output_DE[,-1]
output_DE$sbm = rowMeans(output_DE[,c(1:3)])
output_DE$sbbs = rowMeans(output_DE[,c(4:6)])
output_DE$sim = rowMeans(output_DE[,c(7:9)])
output_DE$sibs = rowMeans(output_DE[,c(10:12)])
output_DE$zmm = rowMeans(output_DE[,c(13:14)])
output_DE$zmbs = rowMeans(output_DE[,c(15:16)])
output_DE$log2FC_Sb = log2((output_DE$sbbs/output_DE$sbm))
output_DE$log2FC_Si = log2((output_DE$sibs/output_DE$sim))
output_DE$log2FC_Zm = log2((output_DE$zmbs/output_DE$zmm))
output_DE$log2FC_all = output_DE$log2FC_Sb + output_DE$log2FC_Si + output_DE$log2FC_Zm
output_DE$log2FC_all = output_DE$log2FC_all/3
head(output_DE)

##                sbM1    sbM2    sbM3  sbBS1  sbBS2  sbBS3   siM1   siM2
## Sb01g000570    4117    4882    3867  77765  66775  68594  21845  25447
## Sb01g011780   78778  143918  111202   1081   1447   1219  50417  25350
## Sb01g015400 1427626 2555219 1649196 530697 348167 383117 147692 198992
## Sb01g028990       8      10      14    231    213    161      6      5
## Sb01g033070      20      36      33    294    199    166     32     16
## Sb01g033620     883    1335    1285  37133  18714  20965    176    138
##               siM3  siBS1  siBS2  siBS3   zmM1   zmM2  zmBS1  zmBS2
## Sb01g000570  22532  73032  76463  65470  15766  19192  61108  66208
## Sb01g011780  37260   2055   3622   2102   7816   6331    353    334
## Sb01g015400 160959 238542 307836 278870 759588 888866 128190 195547
## Sb01g028990      5    229    199    322      4      4    207    165
## Sb01g033070     26   3438   1396   1629     12     14   3324   2522
## Sb01g033620    214  21261  13620  12364     87    103  39192  45140
##             Likelihood   DE FDR.DE       genes log2FC_Sb log2FC_Si
## Sb01g000570          1 bs>m      0 Sb01g000570     4.050     1.622
## Sb01g011780          1 m>bs      0 Sb01g011780    -6.478    -3.861
## Sb01g015400          1 m>bs      0 Sb01g015400    -2.158     0.701
## Sb01g028990          1 bs>m      0 Sb01g028990     4.241     5.551
## Sb01g033070          1 bs>m      0 Sb01g033070     2.888     6.449
## Sb01g033620          1 bs>m      0 Sb01g033620     4.455     6.483
##             log2FC_Zm log2FC_all       sbm     sbbs       sim   sibs
## Sb01g000570     1.865      2.512 4.289e+03  71044.7 2.327e+04  71655
## Sb01g011780    -4.364     -4.901 1.113e+05   1249.0 3.768e+04   2593
## Sb01g015400    -2.348     -1.268 1.877e+06 420660.3 1.692e+05 275083
## Sb01g028990     5.539      5.110 1.067e+01    201.7 5.333e+00    250
## Sb01g033070     7.813      5.717 2.967e+01    219.7 2.467e+01   2154
## Sb01g033620     8.794      6.577 1.168e+03  25604.0 1.760e+02  15748
##                zmm     zmbs
## Sb01g000570  17479  63658.0
## Sb01g011780   7074    343.5
## Sb01g015400 824227 161868.5
## Sb01g028990      4    186.0
## Sb01g033070     13   2923.0
## Sb01g033620     95  42166.0

C4 gene comparison

First I remove a list of C4 genes from John et al., 2014

setwd("~/documents/data/sorghum/Si_SB_Zm")
C4 <- read.csv("Si_C4.csv", header=TRUE,as.is =T )
C4$Identifier <-gsub(".g", "", C4$Identifier)
C4 <- merge(x = annotation, y = C4,by.x ="Si_Zm", by.y="Identifier", all = F)
C4 <-C4[,c(-5:-12)]
colnames(C4) <- c("Si","Sb","gene","role")
C4 <- merge(x =C4, y=output_DE, by.x= "Sb", by.y= "genes", all=FALSE)

Plot each species C4 genes

require(scatterplot3d)

## Loading required package: scatterplot3d

thdp <- scatterplot3d(output_DE$log2FC_Sb ,output_DE$log2FC_Si ,output_DE$log2FC_Zm ,lwd = 0.5, pch =20,  main="Log2FC", highlight.3d=T,angle=100)

plot of chunk unnamed-chunk-22 Do a heatmap of the C4 genes

C4<-C4[order(C4$DE),]

C4hm <-C4[,c(3,24,25,26)]
row.names(C4hm) <-C4hm$gene
C4hm <-C4hm[,-1]
C4hm <-as.matrix(C4hm)


library(gplots)

## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:IRanges':
## 
##     space
## 
## The following object is masked from 'package:stats':
## 
##     lowess

heatmap.2(C4hm, Rowv=T, Colv=F, 
          dendrogram="none", col=colorRampPalette(c("blue","yellow"),100), 
          key=T, symkey=T, keysize=1.0, density.info="none",symbreaks=F,
          trace="none",labCol=c(expression(italic("S.bicolor"), 
                                           italic("S.italica"),
                                           italic("Z.mays"))) ,

          srtCol=20)

plot of chunk unnamed-chunk-24