Fulani_Mossi

Differential gene expression

The high correlations among the libraries do suggest that they despite the initial questionmarks regarding quality might be useful for detection of differential gene expression.

For detection of differential gene expression it is beneficial to omit genes with 0 or is very lowly expressed. Here I omit genes that have fewer than 1 count per million mapped reads in more than 3 libraries. Following this the voom function that transform count data to log counts per million and estimates the mean-variance relationship (In short makes the count data suitable for linear modelling). The MDS plot can be seen as a kind of PCA plot and distance between samples reflects gene expression differences among samples. From the plots one can deduce that there are no large gene expression differences among population and status (Infected or not) as there are no clear clustering of replicates.

# Create count data filtered for lowly expressed genes
table(isexpr <- rowSums(cpm(Monocyte.counts) > 1) >= 3)

## 
## FALSE  TRUE 
## 50927 12225

Monocyte.counts.filt <- Monocyte.counts[isexpr,]
monocyte.voom <- voom(Monocyte.counts.filt)

table(isexpr <- rowSums(cpm(Lymfocyte.counts) > 1) >= 3)

## 
## FALSE  TRUE 
## 49284 13868

Lymfocyte.counts.filt <- Lymfocyte.counts[isexpr,]
lymfocyte.voom <- voom(Lymfocyte.counts.filt)
plotMDS(monocyte.voom, labels = paste(substr(Monocyte.info$Status, 1, 2),substr(Monocyte.info$Population, 1, 2), sep = '.'), main="MDS plot Monocytes")

plotMDS(lymfocyte.voom, labels = paste(substr(Lymfocyte.info$Status, 1, 2),substr(Lymfocyte.info$Population, 1, 2), sep = '.'), main="MDS plot Lymfocytes")

Mono.PS <- paste(Monocyte.info$Population, Monocyte.info$Status, sep=".")
Mono.PS <- factor(Mono.PS, levels=c("Fulani.Infected","Mossi.Uninfected","Fulani.Uninfected","Mossi.Infected"))
monocyte.design <- model.matrix(~0 + Mono.PS)
colnames(monocyte.design) <- levels(Mono.PS)

Lymfo.PS <- paste(Lymfocyte.info$Population, Lymfocyte.info$Status, sep=".")
Lymfo.PS <- factor(Lymfo.PS, levels=c("Fulani.Infected","Mossi.Uninfected","Fulani.Uninfected","Mossi.Infected"))
lymfocyte.design <- model.matrix(~0 + Lymfo.PS)
colnames(lymfocyte.design) <- levels(Lymfo.PS)

The converted gene counts are fitted in a linear model using the previously created design matrixes. The contrasts of interested are then extracted from the complete model and test statistics are computed with the function eBayes.

monocyte.fit <- lmFit(monocyte.voom, monocyte.design)
monocyte.cont.matrix <- makeContrasts(UninfMovsUninfFu=Mossi.Uninfected-Fulani.Uninfected, UninfFuvsInfFu=Fulani.Uninfected-Fulani.Infected, UninfMovsInfMo=Mossi.Uninfected-Mossi.Infected, InfMovsInfFu=Mossi.Infected-Fulani.Infected, levels=monocyte.design)
monocyte.fit2 <- contrasts.fit(monocyte.fit, monocyte.cont.matrix)
monocyte.fit3 <- monocyte.fit2
monocyte.fit2 <- eBayes(monocyte.fit2)

lymfocyte.fit <- lmFit(lymfocyte.voom, lymfocyte.design)
lymfocyte.cont.matrix <- makeContrasts(UninfMovsUninfFu=Mossi.Uninfected-Fulani.Uninfected, UninfFuvsInfFu=Fulani.Uninfected-Fulani.Infected, UninfMovsInfMo=Mossi.Uninfected-Mossi.Infected, InfMovsInfFu=Mossi.Infected-Fulani.Infected, levels=lymfocyte.design)
lymfocyte.fit2 <- contrasts.fit(lymfocyte.fit, lymfocyte.cont.matrix)
lymfocyte.fit2 <- eBayes(lymfocyte.fit2)

Below are the summaries from the analysis if one sets the cut-off at adjusted p-value < 0.05. -1 and 1 represents genes that have a significantly lower or higher expression level in the first sample of the the tested contrast, whereas the 0 values represents the non-significant genes. I have not used any cut-off for the fold-change, but can do so if you have any preferences.

library(biomaRt)

## Creating a generic function for 'nchar' from package 'base' in package 'S4Vectors'

ensembl <- useMart("ENSEMBL_MART_ENSEMBL", host = "sep2013.archive.ensembl.org") # use the ensembl v. 73 released at september 2013
ensembl <- useDataset('hsapiens_gene_ensembl', mart = ensembl)
lymfocytes.names <- getBM(attributes= c("ensembl_gene_id","external_gene_id", "entrezgene"), filter = "ensembl_gene_id", values=row.names(lymfocyte.fit2$t), mart=ensembl)
monocytes.names <- getBM(attributes= c("ensembl_gene_id","external_gene_id", "entrezgene"), filter = "ensembl_gene_id", values=row.names(monocyte.fit2$t), mart=ensembl)

monocyte.fit2$genes <- monocytes.names[!duplicated(monocytes.names$ensembl_gene_id),]
lymfocyte.fit2$genes <- lymfocytes.names[!duplicated(lymfocytes.names$ensembl_gene_id),]

summary(monocyte.fit2.results <- decideTests(monocyte.fit2))

##    UninfMovsUninfFu UninfFuvsInfFu UninfMovsInfMo InfMovsInfFu
## -1                0             21              0            0
## 0             12225          10621          12225        12225
## 1                 0           1583              0            0

summary(lymfocyte.fit2.results <- decideTests(lymfocyte.fit2))

##    UninfMovsUninfFu UninfFuvsInfFu UninfMovsInfMo InfMovsInfFu
## -1                0              0              0            0
## 0             13868          13868          13868        13868
## 1                 0              0              0            0

topTable(monocyte.fit2, coef = 'UninfFuvsInfFu')

##                 ensembl_gene_id external_gene_id entrezgene logFC AveExpr
## ENSG00000198853 ENSG00000198853            RUSC2       9853 2.364   3.500
## ENSG00000126368 ENSG00000126368            NR1D1       9572 3.193   1.742
## ENSG00000129219 ENSG00000129219             PLD2       5338 1.621   3.672
## ENSG00000161011 ENSG00000161011           SQSTM1       8878 1.691   6.722
## ENSG00000106991 ENSG00000106991              ENG       2022 2.498   2.562
## ENSG00000196923 ENSG00000196923           PDLIM7       9260 2.476   2.231
## ENSG00000242802 ENSG00000242802            AP5Z1       9907 3.115   2.078
## ENSG00000095383 ENSG00000095383           TBC1D2      55357 2.286   3.472
## ENSG00000123136 ENSG00000123136           DDX39A      10212 2.101   3.527
## ENSG00000141503 ENSG00000141503            MINK1      50488 1.741   4.731
##                     t   P.Value adj.P.Val     B
## ENSG00000198853 7.574 7.739e-06   0.01525 4.024
## ENSG00000126368 7.842 5.487e-06   0.01525 3.999
## ENSG00000129219 7.482 8.732e-06   0.01525 3.931
## ENSG00000161011 7.300 1.110e-05   0.01525 3.766
## ENSG00000106991 7.130 1.395e-05   0.01525 3.393
## ENSG00000196923 7.127 1.399e-05   0.01525 3.338
## ENSG00000242802 7.120 1.413e-05   0.01525 3.303
## ENSG00000095383 6.942 1.801e-05   0.01525 3.257
## ENSG00000123136 6.829 2.106e-05   0.01525 3.117
## ENSG00000141503 6.742 2.379e-05   0.01525 3.037

write.fit(monocyte.fit2, results = monocyte.fit2.results, digits = 20, adjust = 'fdr', file = 'MonocytesResults.txt')
write.fit(lymfocyte.fit2, results = lymfocyte.fit2.results, digits = 20, adjust = 'fdr', file = 'lymfocytesResults.txt')

sessionInfo() # Keep track of versions etc

## R version 3.2.1 (2015-06-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] biomaRt_2.24.0  lattice_0.20-33 edgeR_3.10.2    limma_3.24.15  
## 
## loaded via a namespace (and not attached):
##  [1] AnnotationDbi_1.30.1 knitr_1.11           magrittr_1.5        
##  [4] IRanges_2.2.7        BiocGenerics_0.14.0  GenomeInfoDb_1.4.2  
##  [7] stringr_1.0.0        tools_3.2.1          parallel_3.2.1      
## [10] grid_3.2.1           Biobase_2.28.0       DBI_0.3.1           
## [13] htmltools_0.2.6      yaml_2.1.13          digest_0.6.8        
## [16] formatR_1.2          S4Vectors_0.6.4      bitops_1.0-6        
## [19] RCurl_1.95-4.7       evaluate_0.7.2       RSQLite_1.0.0       
## [22] rmarkdown_0.8        stringi_0.5-5        stats4_3.2.1        
## [25] XML_3.98-1.3

Below is the analysis of GO enrichment using the the package TopGO. I do a correction for multiple testing, but the waf to proceed with multiple testing on the case for GO-enrichment are not trivial so it might be the the results here are fairly conservative. Note that for the test on 21 down-regulated genes no catogories are enriched significantly after correction for multiple testing.

library(topGO)

## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:limma':
## 
##     plotMA
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist, unsplit
## 
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: GenomeInfoDb
## Loading required package: S4Vectors

## Warning: package 'S4Vectors' was built under R version 3.2.2

## Loading required package: IRanges
## Loading required package: DBI
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## 
## The following object is masked from 'package:base':
## 
##     backsolve

## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.

## 
## Attaching package: 'topGO'
## 
## The following object is masked from 'package:IRanges':
## 
##     members

limmaTopGenes <- function(marray.lm, p.val = 0.05, direction = c("both", "up", "down")) {
    require(limma)
    result.table.allgenes <- topTable(marray.lm, coef = 'UninfFuvsInfFu', adjust="BH", number=length(marray.lm$genes[, 1]))
    if (direction == "both") {
        # Use all significant genes in analysis
        # selected.genes <- result.table.allgenes[result.table.allgenes$adj.P.Val < p.val, ]
        # result.table.allgenes[abs(result.table.allgenes$logFC) < 0, ncol(result.table.allgenes)] <- 1
        genes.pval <- setNames(result.table.allgenes$adj.P.Val, result.table.allgenes$ensembl_gene_id)
    } else if (direction == "up") {
        # Use the significant up-regulated genes in analysis, whereas the significant down-regulated genes get adjusted p-values of 1
        result.table.allgenes[result.table.allgenes$logFC < 0, 8] <- 1  
        # selected.genes <- result.table.allgenes[result.table.allgenes$adj.P.Val < p.val, ]
        genes.pval <- setNames(result.table.allgenes$adj.P.Val, result.table.allgenes$ensembl_gene_id)
    } else {
        # Use the significant down-regulated genes in analysis, whereas the significant up-regulated genes get adjusted p-values of 1
        result.table.allgenes[result.table.allgenes$logFC > 0, 8] <- 1
        # selected.genes <- result.table.allgenes[result.table.allgenes$adj.P.Val < p.val, ]
        genes.pval <- setNames(result.table.allgenes$adj.P.Val, result.table.allgenes$ensembl_gene_id)
    } 
}

topDiffGenes <- function(allScore) {
       return(allScore < 0.05)
       }

monocyte.fit2.up.GO <- limmaTopGenes(monocyte.fit2, dir = 'up') 
monocyte.fit2.up.GO.BP <- new("topGOdata", description="GO annotation Monocyte", ontology="BP", allGenes = monocyte.fit2.up.GO, geneSel = topDiffGenes, nodeSize = 10, annot = annFUN.org, mapping="org.Hs.eg.db", ID = "Ensembl")

## 
## Building most specific GOs .....

## Loading required package: org.Hs.eg.db

##  ( 8350 GO terms found. )
## 
## Build GO DAG topology .......... ( 11948 GO terms and 28640 relations. )
## 
## Annotating nodes ............... ( 9213 genes annotated to the GO terms. )

result.fischer.up.GO.BP <- runTest(monocyte.fit2.up.GO.BP, algorithm = "classic", statistic = "fisher")

## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 4284 nontrivial nodes
##       parameters: 
##           test statistic:  fisher

padj.temp <- length(result.fischer.up.GO.BP@score)
all.monocyte.up.GO.BP <- GenTable(monocyte.fit2.up.GO.BP, classic = result.fischer.up.GO.BP, topNodes = padj.temp)
all.monocyte.up.GO.BP$p.adj <- p.adjust(all.monocyte.up.GO.BP[,ncol(all.monocyte.up.GO.BP)], 'fdr')
all.monocyte.up.GO.BP[all.monocyte.up.GO.BP$p.adj < 0.05,]

##         GO.ID                                        Term Annotated
## 1  GO:0051179                                localization      3172
## 2  GO:0051056 regulation of small GTPase mediated sign...       393
## 3  GO:1902531 regulation of intracellular signal trans...      1080
## 4  GO:0043547      positive regulation of GTPase activity       319
## 5  GO:0051234               establishment of localization      2677
## 6  GO:0030029                actin filament-based process       352
## 7  GO:0044763            single-organism cellular process      6663
## 8  GO:0035556           intracellular signal transduction      1658
## 9  GO:0006810                                   transport      2609
## 10 GO:0007154                          cell communication      3191
## 11 GO:0046578 regulation of Ras protein signal transdu...       377
## 12 GO:0030036             actin cytoskeleton organization       323
## 13 GO:0051716               cellular response to stimulus      3653
## 14 GO:0031291             Ran protein signal transduction       333
## 15 GO:0032011             ARF protein signal transduction       333
## 16 GO:0032012 regulation of ARF protein signal transdu...       333
## 17 GO:0032015 regulation of Ran protein signal transdu...       333
## 18 GO:0032483 regulation of Rab protein signal transdu...       333
## 19 GO:0032484             Ral protein signal transduction       333
## 20 GO:0032485 regulation of Ral protein signal transdu...       333
## 21 GO:0043087               regulation of GTPase activity       333
## 22 GO:0050896                        response to stimulus      4335
## 23 GO:0032488           Cdc42 protein signal transduction       334
## 24 GO:0032489 regulation of Cdc42 protein signal trans...       334
## 25 GO:0035023 regulation of Rho protein signal transdu...       351
## 26 GO:0032487 regulation of Rap protein signal transdu...       335
## 27 GO:0065009            regulation of molecular function      1629
## 28 GO:0035020 regulation of Rac protein signal transdu...       339
## 29 GO:0007264 small GTPase mediated signal transductio...       553
## 30 GO:0007165                         signal transduction      2959
## 31 GO:0032486             Rap protein signal transduction       340
## 32 GO:0016601             Rac protein signal transduction       351
## 33 GO:0009893    positive regulation of metabolic process      2058
## 34 GO:0007266             Rho protein signal transduction       364
## 35 GO:0051649       establishment of localization in cell      1663
## 36 GO:0044700                   single organism signaling      3130
## 37 GO:0044093 positive regulation of molecular functio...      1093
## 38 GO:0023052                                   signaling      3134
## 39 GO:0016192                  vesicle-mediated transport       875
## 40 GO:0044699                     single-organism process      7268
## 41 GO:0007265             Ras protein signal transduction       497
## 42 GO:0051345 positive regulation of hydrolase activit...       579
## 43 GO:0046907                     intracellular transport      1303
## 44 GO:0032482             Rab protein signal transduction       380
## 45 GO:0051641                       cellular localization      1878
## 46 GO:0010646            regulation of cell communication      1782
## 47 GO:0009966           regulation of signal transduction      1643
## 48 GO:0023051                     regulation of signaling      1770
## 49 GO:0006612               protein targeting to membrane       163
## 50 GO:0000184 nuclear-transcribed mRNA catabolic proce...       114
## 51 GO:0048518 positive regulation of biological proces...      3094
## 52 GO:0051336            regulation of hydrolase activity       757
## 53 GO:1902582     single-organism intracellular transport      1102
## 54 GO:0044765                   single-organism transport      2156
## 55 GO:0050790            regulation of catalytic activity      1362
## 56 GO:0048583          regulation of response to stimulus      2128
## 57 GO:0006897                                 endocytosis       392
## 58 GO:0009405                                pathogenesis       176
## 59 GO:1902578                single-organism localization      2272
## 60 GO:0019080                       viral gene expression       184
## 61 GO:0043085 positive regulation of catalytic activit...       933
## 62 GO:0019083                         viral transcription       175
## 63 GO:0046777                 protein autophosphorylation       154
## 64 GO:0006605                           protein targeting       493
## 65 GO:0008104                        protein localization      1570
## 66 GO:0048522     positive regulation of cellular process      2609
## 67 GO:0044033            multi-organism metabolic process       193
##    Significant Expected classic    p.adj
## 1          541   462.39 7.6e-07 0.001719
## 2           91    57.29 2.4e-06 0.001719
## 3          209   157.43 2.9e-06 0.001719
## 4           77    46.50 2.9e-06 0.001719
## 5          461   390.23 3.2e-06 0.001719
## 6           83    51.31 3.2e-06 0.001719
## 7         1039   971.28 3.3e-06 0.001719
## 8          302   241.69 3.5e-06 0.001719
## 9          450   380.32 3.8e-06 0.001719
## 10         538   465.16 4.3e-06 0.001719
## 11          87    54.96 4.7e-06 0.001719
## 12          77    47.08 4.8e-06 0.001719
## 13         606   532.51 5.9e-06 0.001719
## 14          78    48.54 8.3e-06 0.001719
## 15          78    48.54 8.3e-06 0.001719
## 16          78    48.54 8.3e-06 0.001719
## 17          78    48.54 8.3e-06 0.001719
## 18          78    48.54 8.3e-06 0.001719
## 19          78    48.54 8.3e-06 0.001719
## 20          78    48.54 8.3e-06 0.001719
## 21          78    48.54 8.3e-06 0.001719
## 22         705   631.92 9.0e-06 0.001719
## 23          78    48.69 9.3e-06 0.001719
## 24          78    48.69 9.3e-06 0.001719
## 25          81    51.17 1.0e-05 0.001719
## 26          78    48.83 1.0e-05 0.001719
## 27         294   237.46 1.1e-05 0.001821
## 28          78    49.42 1.6e-05 0.002514
## 29         116    80.61 1.7e-05 0.002514
## 30         498   431.34 1.7e-05 0.002514
## 31          78    49.56 1.8e-05 0.002514
## 32          80    51.17 1.8e-05 0.002514
## 33         359   300.00 2.3e-05 0.003023
## 34          82    53.06 2.3e-05 0.003023
## 35         297   242.42 2.4e-05 0.003064
## 36         522   456.27 2.7e-05 0.003352
## 37         205   159.33 3.1e-05 0.003687
## 38         522   456.85 3.2e-05 0.003687
## 39         169   127.55 3.3e-05 0.003687
## 40        1114  1059.47 3.3e-05 0.003687
## 41         104    72.45 5.2e-05 0.005668
## 42         118    84.40 5.6e-05 0.005959
## 43         237   189.94 5.9e-05 0.006132
## 44          83    55.39 6.6e-05 0.006704
## 45         327   273.76 7.2e-05 0.007092
## 46         312   259.77 7.3e-05 0.007092
## 47         290   239.50 7.8e-05 0.007417
## 48         309   258.02 0.00010 0.009310
## 49          42    23.76 0.00011 0.010032
## 50          32    16.62 0.00013 0.011619
## 51         510   451.02 0.00014 0.012268
## 52         145   110.35 0.00019 0.016329
## 53         201   160.64 0.00020 0.016864
## 54         366   314.29 0.00021 0.017379
## 55         242   198.54 0.00023 0.018689
## 56         360   310.20 0.00033 0.025427
## 57          82    57.14 0.00033 0.025427
## 58          43    25.66 0.00033 0.025427
## 59         381   331.19 0.00042 0.031813
## 60          44    26.82 0.00047 0.035007
## 61         171   136.01 0.00050 0.036631
## 62          42    25.51 0.00058 0.041807
## 63          38    22.45 0.00059 0.041853
## 64          98    71.87 0.00060 0.041897
## 65         271   228.86 0.00065 0.044690
## 66         430   380.32 0.00070 0.047398
## 67          45    28.13 0.00073 0.048692

# showSigOfNodes(monocyte.fit2.up.GO.BP, score(result.fischer.up.GO.BP), firstSigNodes = 10, useInfo = 'all')

monocyte.fit2.up.GO.CC <- new("topGOdata", description="GO annotation Monocyte", ontology="CC", allGenes = monocyte.fit2.up.GO, geneSel = topDiffGenes, nodeSize = 10, annot = annFUN.org, mapping="org.Hs.eg.db", ID = "Ensembl")

## 
## Building most specific GOs ..... ( 1205 GO terms found. )
## 
## Build GO DAG topology .......... ( 1461 GO terms and 2906 relations. )
## 
## Annotating nodes ............... ( 9590 genes annotated to the GO terms. )

result.fischer.up.GO.CC <- runTest(monocyte.fit2.up.GO.CC, algorithm = "classic", statistic = "fisher")

## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 556 nontrivial nodes
##       parameters: 
##           test statistic:  fisher

padj.temp <- length(result.fischer.up.GO.CC@score)
all.monocyte.up.GO.CC <- GenTable(monocyte.fit2.up.GO.CC, classic = result.fischer.up.GO.CC, topNodes = padj.temp)
all.monocyte.up.GO.CC$p.adj <- p.adjust(all.monocyte.up.GO.CC[,ncol(all.monocyte.up.GO.CC)], 'fdr')
all.monocyte.up.GO.CC[all.monocyte.up.GO.CC$p.adj < 0.05,]

##         GO.ID                              Term Annotated Significant
## 1  GO:0016020                          membrane      4746         777
## 2  GO:0070161                anchoring junction       336          79
## 3  GO:0005912                 adherens junction       331          78
## 4  GO:0005925                    focal adhesion       298          71
## 5  GO:0005924  cell-substrate adherens junction       299          71
## 6  GO:0030055           cell-substrate junction       300          71
## 7  GO:0030054                     cell junction       595         123
## 8  GO:0015629                actin cytoskeleton       266          63
## 9  GO:0005829                           cytosol      2185         372
## 10 GO:0044422                    organelle part      5490         858
## 11 GO:0044446      intracellular organelle part      5417         847
## 12 GO:0031982                           vesicle      2210         373
## 13 GO:0044448                  cell cortex part        65          21
## 14 GO:0022626                cytosolic ribosome        93          27
## 15 GO:0042995                   cell projection       837         155
## 16 GO:0098588    bounding membrane of organelle      1475         256
## 17 GO:0005737                         cytoplasm      6653        1015
## 18 GO:0044437                     vacuolar part       276          60
## 19 GO:0031988          membrane-bounded vesicle      2150         358
## 20 GO:0005774                 vacuolar membrane       239          53
## 21 GO:0044445                    cytosolic part       174          41
## 22 GO:0043005                 neuron projection       401          81
## 23 GO:0044444                  cytoplasmic part      5076         787
## 24 GO:0005773                           vacuole       411          82
## 25 GO:0005938                       cell cortex       125          31
## 26 GO:0005765                lysosomal membrane       209          46
## 27 GO:0030864       cortical actin cytoskeleton        30          11
## 28 GO:0022625 cytosolic large ribosomal subunit        48          15
## 29 GO:0097458                       neuron part       469          90
##    Expected classic     p.adj
## 1    686.41 8.2e-08 0.0000474
## 2     48.60 4.7e-06 0.0009441
## 3     47.87 4.9e-06 0.0009441
## 4     43.10 8.8e-06 0.0010597
## 5     43.24 9.9e-06 0.0010597
## 6     43.39 1.1e-05 0.0010597
## 7     86.05 1.4e-05 0.0011560
## 8     38.47 3.4e-05 0.0024565
## 9    316.02 7.7e-05 0.0049451
## 10   794.02 9.2e-05 0.0053176
## 11   783.46 0.00011 0.0057800
## 12   319.63 0.00016 0.0077067
## 13     9.40 0.00021 0.0086700
## 14    13.45 0.00021 0.0086700
## 15   121.06 0.00041 0.0149600
## 16   213.33 0.00043 0.0149600
## 17   962.22 0.00044 0.0149600
## 18    39.92 0.00061 0.0195878
## 19   310.95 0.00069 0.0209905
## 20    34.57 0.00077 0.0222530
## 21    25.17 0.00085 0.0225945
## 22    58.00 0.00086 0.0225945
## 23   734.14 0.00114 0.0281775
## 24    59.44 0.00117 0.0281775
## 25    18.08 0.00147 0.0339864
## 26    30.23 0.00195 0.0433500
## 27     4.34 0.00215 0.0460259
## 28     6.94 0.00234 0.0474359
## 29    67.83 0.00238 0.0474359

monocyte.fit2.up.GO.MF <- new("topGOdata", description="GO annotation Monocyte", ontology="MF", allGenes = monocyte.fit2.up.GO, geneSel = topDiffGenes, nodeSize = 10, annot = annFUN.org, mapping="org.Hs.eg.db", ID = "Ensembl")

## 
## Building most specific GOs ..... ( 2951 GO terms found. )
## 
## Build GO DAG topology .......... ( 3459 GO terms and 4378 relations. )
## 
## Annotating nodes ............... ( 9325 genes annotated to the GO terms. )

result.fischer.up.GO.MF <- runTest(monocyte.fit2.up.GO.MF, algorithm = "classic", statistic = "fisher")

## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 743 nontrivial nodes
##       parameters: 
##           test statistic:  fisher

padj.temp <- length(result.fischer.up.GO.MF@score)
all.monocyte.up.GO.MF <- GenTable(monocyte.fit2.up.GO.MF, classic = result.fischer.up.GO.MF, topNodes = padj.temp)
all.monocyte.up.GO.MF$p.adj <- p.adjust(all.monocyte.up.GO.MF[,ncol(all.monocyte.up.GO.MF)], 'fdr')
all.monocyte.up.GO.MF[all.monocyte.up.GO.MF$p.adj < 0.05,]

##        GO.ID                                        Term Annotated
## 1 GO:0019899                              enzyme binding      1094
## 2 GO:0016773 phosphotransferase activity, alcohol gro...       482
## 3 GO:0005515                             protein binding      6494
## 4 GO:0030695                   GTPase regulator activity       199
## 5 GO:0016301                             kinase activity       528
## 6 GO:0016772 transferase activity, transferring phosp...       652
## 7 GO:0005096                   GTPase activator activity       180
## 8 GO:0060589 nucleoside-triphosphatase regulator acti...       215
## 9 GO:0004674    protein serine/threonine kinase activity       321
##   Significant Expected classic     p.adj
## 1         216   160.73 7.5e-07 0.0004788
## 2         109    70.81 1.2e-06 0.0004788
## 3        1026   954.08 2.0e-06 0.0005320
## 4          53    29.24 6.8e-06 0.0011810
## 5         114    77.57 7.4e-06 0.0011810
## 6         134    95.79 1.7e-05 0.0020520
## 7          48    26.45 1.8e-05 0.0020520
## 8          54    31.59 3.4e-05 0.0033915
## 9          70    47.16 0.00032 0.0283733

monocyte.fit2.down.GO <- limmaTopGenes(monocyte.fit2, dir = 'down') 
monocyte.fit2.down.GO.BP <- new("topGOdata", description="GO annotation Monocyte", ontology="BP", allGenes = monocyte.fit2.down.GO, geneSel = topDiffGenes, nodeSize = 10, annot = annFUN.org, mapping="org.Hs.eg.db", ID = "Ensembl")

## 
## Building most specific GOs ..... ( 8350 GO terms found. )
## 
## Build GO DAG topology .......... ( 11948 GO terms and 28640 relations. )
## 
## Annotating nodes ............... ( 9213 genes annotated to the GO terms. )

result.fischer.down.GO.BP <- runTest(monocyte.fit2.down.GO.BP, algorithm = "classic", statistic = "fisher")

## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 943 nontrivial nodes
##       parameters: 
##           test statistic:  fisher

padj.temp <- length(result.fischer.down.GO.BP@score)
all.monocyte.down.GO.BP <- GenTable(monocyte.fit2.down.GO.BP, classic = result.fischer.down.GO.BP, topNodes = padj.temp)
all.monocyte.down.GO.BP$p.adj <- p.adjust(all.monocyte.down.GO.BP[,ncol(all.monocyte.down.GO.BP)], 'fdr')
all.monocyte.down.GO.BP[all.monocyte.down.GO.BP$p.adj < 0.05,]

## [1] GO.ID       Term        Annotated   Significant Expected    classic    
## [7] p.adj      
## <0 rows> (or 0-length row.names)

monocyte.fit2.down.GO.CC <- new("topGOdata", description="GO annotation Monocyte", ontology="CC", allGenes = monocyte.fit2.down.GO, geneSel = topDiffGenes, nodeSize = 10, annot = annFUN.org, mapping="org.Hs.eg.db", ID = "Ensembl")

## 
## Building most specific GOs ..... ( 1205 GO terms found. )
## 
## Build GO DAG topology .......... ( 1461 GO terms and 2906 relations. )
## 
## Annotating nodes ............... ( 9590 genes annotated to the GO terms. )

result.fischer.down.GO.CC <- runTest(monocyte.fit2.down.GO.CC, algorithm = "classic", statistic = "fisher")

## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 145 nontrivial nodes
##       parameters: 
##           test statistic:  fisher

padj.temp <- length(result.fischer.down.GO.CC@score)
all.monocyte.down.GO.CC <- GenTable(monocyte.fit2.down.GO.CC, classic = result.fischer.down.GO.CC, topNodes = padj.temp)
all.monocyte.down.GO.CC$p.adj <- p.adjust(all.monocyte.down.GO.CC[,ncol(all.monocyte.down.GO.CC)], 'fdr')
all.monocyte.down.GO.CC[all.monocyte.down.GO.CC$p.adj < 0.05,]

## [1] GO.ID       Term        Annotated   Significant Expected    classic    
## [7] p.adj      
## <0 rows> (or 0-length row.names)

monocyte.fit2.down.GO.MF <- new("topGOdata", description="GO annotation Monocyte", ontology="MF", allGenes = monocyte.fit2.down.GO, geneSel = topDiffGenes, nodeSize = 10, annot = annFUN.org, mapping="org.Hs.eg.db", ID = "Ensembl")

## 
## Building most specific GOs ..... ( 2951 GO terms found. )
## 
## Build GO DAG topology .......... ( 3459 GO terms and 4378 relations. )
## 
## Annotating nodes ............... ( 9325 genes annotated to the GO terms. )

result.fischer.down.GO.MF <- runTest(monocyte.fit2.down.GO.MF, algorithm = "classic", statistic = "fisher")

## 
##           -- Classic Algorithm -- 
## 
##       the algorithm is scoring 117 nontrivial nodes
##       parameters: 
##           test statistic:  fisher

padj.temp <- length(result.fischer.down.GO.MF@score)
all.monocyte.down.GO.MF <- GenTable(monocyte.fit2.down.GO.MF, classic = result.fischer.down.GO.MF, topNodes = padj.temp)
all.monocyte.down.GO.MF$p.adj <- p.adjust(all.monocyte.down.GO.MF[,ncol(all.monocyte.down.GO.MF)], 'fdr')
all.monocyte.down.GO.MF[all.monocyte.down.GO.MF$p.adj < 0.05,]

## [1] GO.ID       Term        Annotated   Significant Expected    classic    
## [7] p.adj      
## <0 rows> (or 0-length row.names)

Fulani_Mossi

Thomas Källman, BILS

13 Aug 2015

Introduction

Differential gene expression