suppressMessages(library(limma))
suppressMessages(library(cowplot))
suppressMessages(library(data.table))
suppressMessages(library(reshape2))
suppressMessages(library(clusterProfiler))
suppressMessages(library(org.Hs.eg.db))
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7")
Utility Functions
plotIntensitiesForCohort <- function(object, title, foreground=TRUE, takeLog=TRUE){
disease <- grepl(".*CAC.*", colnames(object))
control <- grepl(".*HCpool*", colnames(object))
values.disease <- object[, disease]
values.control <- object[, control]
if (foreground){
log2RowMeans.disease <- log2(rowMeans(as.data.frame(values.disease$E, row.names=0)))
log2RowMeans.control <- log2(rowMeans(as.data.frame(values.control$E, row.names=0)))
}
else{
log2RowMeans.disease <- log2(rowMeans(as.data.frame(values.disease$Eb, row.names=0)))
log2RowMeans.control <- log2(rowMeans(as.data.frame(values.control$Eb, row.names=0)))
}
k <- list(log2RowMeans.disease, log2RowMeans.control)
grp <- c(rep("Cancer", length(disease)),
rep("Control", length(control)))
df <- data.frame(x=unlist(k), grp=grp)
if (takeLog){
g = ggplot(df,aes(x = grp, y = log2(x) )) +
geom_boxplot(aes(fill=factor(grp))) +
scale_fill_manual(breaks = c("Cancer", "Control"),
labels = c("Cancer", "Control"),
values = cbPalette[6:7]) +
xlab("Cohort") +
ylab("log2 foreground intensities") +
ggtitle(title)
return (g)
}
if (takeLog == FALSE){
g = ggplot(df,aes(x = grp, y = x)) +
geom_boxplot(aes(fill=factor(grp))) +
scale_fill_manual(breaks = c("Cancer", "Control"),
labels = c("Cancer", "Control"),
values = cbPalette[6:7]) +
xlab("Cohort") +
ylab("log2 foreground intensities") +
ggtitle(title)
return (g)
}
}
log2TransformIntensities <- function(E)
{
return (log2(rowMeans(as.data.frame(E, row.names=0))))
}
'%nin%' <- Negate('%in%')
palette12 <- c('#a6cee3',
'#1f78b4','#b2df8a',
'#33a02c', '#fb9a99','#e31a1c',
'#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928')
plotIntensitiesForCohortByProbe <- function(object, title){
disease <- grepl(".*CAC.*", colnames(object))
control <- grepl(".*HCpool*", colnames(object))
disease <- object[, disease]
control <- object[, control]
controlProbes <- object$genes$ID %in% c('Control')
bufferProbes <- object$genes$Name %in% c('buffer', 'Buffer')
## Some control probes seem to be labelled as 'Buffer' in name column
## so we will treat them as buffer only
controlProbes <- setdiff(controlProbes, bufferProbes)
otherProbes <- object$genes$ID %nin% c('Control') & object$genes$Name %nin% c('buffer', 'Buffer')
disease.E <- disease$E
control.E <- control$E
disease.Eb <- disease$Eb
control.Eb <- control$Eb
disease.E.control <- disease.E[controlProbes,]
control.E.control <- control.E[controlProbes,]
disease.E.buffer <- disease.E[bufferProbes,]
control.E.buffer <- control.E[bufferProbes,]
disease.E.others <- disease.E[controlProbes,]
control.E.others <- control.E[controlProbes,]
disease.Eb.control <- disease.Eb[controlProbes,]
control.Eb.control <- control.Eb[controlProbes,]
disease.Eb.buffer <- disease.Eb[bufferProbes,]
control.Eb.buffer <- control.Eb[bufferProbes,]
disease.Eb.others <- disease.Eb[controlProbes,]
control.Eb.others <- control.Eb[controlProbes,]
k <- list(log2TransformIntensities(disease.E.control),
log2TransformIntensities(control.E.control),
log2TransformIntensities(disease.E.buffer),
log2TransformIntensities(control.E.buffer),
#log2TransformIntensities(disease.E.others),
#log2TransformIntensities(control.E.others),
log2TransformIntensities(disease.Eb.control),
log2TransformIntensities(control.Eb.control),
log2TransformIntensities(disease.Eb.buffer),
log2TransformIntensities(control.Eb.buffer)#,
#log2TransformIntensities(disease.Eb.others),
#log2TransformIntensities(control.Eb.others)
)
grp <- c(rep("Disease-Foreground-ControlProbes", length(disease)),
rep("Control-Foreground-ControlProbes", length(control)),
rep("Disease-Foreground-BufferProbes", length(disease)),
rep("Control-Foreground-BufferProbes", length(control)),
#rep("Disease-Foreground-OtherProbes", length(disease)),
#rep("Control-Foreground-OtherProbes", length(control)),
rep("Disease-Background-ControlProbes", length(disease)),
rep("Control-Background-ControlProbes", length(control)),
rep("Disease-Background-BufferProbes", length(disease)),
rep("Control-Background-BufferProbes", length(control))#,
#rep("Disease-Background-OtherProbes", length(disease)),
#rep("Control-Background-OtherProbes", length(control))
)
df <- data.frame(x=unlist(k), grp=grp)
g = ggplot(df,aes(x = grp, y = log2(x) )) +
geom_boxplot(aes(fill=factor(grp))) +
scale_fill_manual(breaks = c("Disease-Foreground-ControlProbes",
"Control-Foreground-ControlProbes",
"Disease-Foreground-BufferProbes",
"Control-Foreground-BufferProbes",
#"Disease-Foreground-OtherProbes",
#"Control-Foreground-OtherProbes",
"Disease-Background-ControlProbes",
"Control-Background-ControlProbes",
"Disease-Background-BufferProbes",
"Control-Background-BufferProbes"),
#"Disease-Background-OtherProbes",
#"Control-Background-OtherProbes"),
labels =c("Disease-Foreground-ControlProbes",
"Control-Foreground-ControlProbes",
"Disease-Foreground-BufferProbes",
"Control-Foreground-BufferProbes",
#"Disease-Foreground-OtherProbes",
#"Control-Foreground-OtherProbes",
"Disease-Background-ControlProbes",
"Control-Background-ControlProbes",
"Disease-Background-BufferProbes",
"Control-Background-BufferProbes"),
#"Disease-Background-OtherProbes",
#"Control-Background-OtherProbes"),
values = palette12) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Cohort") +
ylab("log2 foreground intensities") +
ggtitle(title)
return (g)
}
plotForegroundIntensities <- function(object, title){
neg <- as.data.frame(object$E)
g = ggplot(data = melt(neg), aes(x=variable, y=log2(value)) ) + geom_boxplot(aes(fill=variable)) +xlab("Samples 1-85") + ylab("log2 foreground") + ggtitle(title)
ggsave(filename=paste(outputDirectory,title,".png", sep=""), plot=g)
}
plotControls <- function(object, title){
neg <- as.data.frame(object$E)
g = ggplot(data = melt(neg), aes(x=variable, y=log2(value)) ) + geom_boxplot(aes(fill=variable)) +xlab("Samples 1-85") + ylab("log2 foreground") + ggtitle(title)
ggsave(filename=paste(outputDirectory,title,".png", sep=""), plot=g)
}
get_entrez <- function(x){
bitr(x, fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = org.Hs.eg.db)$ENTREZID
}
get_name <- function(x){
bitr(x, fromType = 'ENTREZID', toType = 'SYMBOL', OrgDb = org.Hs.eg.db)$SYMBOL
}
writeEKG <- function(ekg, prefix){
ekg <- as.data.frame(ekg)
ekg.geneID <- strsplit(as.character(ekg$geneID), '/', fixed=TRUE)
ekg.genes <- lapply(ekg.geneID, get_name)
ekg$geneID <- ekg.genes
ekg$geneID <- vapply(ekg$geneID , paste, collapse = ", ", character(1L))
write.table(ekg, file.path('..','results', paste(prefix, 'tsv', sep='.')))
}
writeego <- function(ego, prefix){
ego <- as.data.frame(ego)
ego.geneID <- strsplit(as.character(ego$geneID), '/', fixed=TRUE)
ego.genes <- lapply(ego.geneID, get_name)
ego$geneID <- ego.genes
ego$geneID <- vapply(ego$geneID , paste, collapse = ", ", character(1L))
write.table(ego, file.path('..','results', paste(prefix, 'tsv', sep='.')))
}
Targets
createShortName <- function (longFileName){
splitted <- strsplit(longFileName, split = '_')
return (paste(splitted[3], splitted[4], sep = '_'))
}
limmaCy5 <- 'F635 Median'
limmaCy5b <- 'B635 Median'
targetFile <- file.path('..', 'design_file.tsv')
targetDir <- file.path('..', 'data_new')
targets <- fread(targetFile)
targets$shortName <- lapply(targets$FileName, createShortName)
RG <- read.maimages( targets$FileName,
source="genepix.custom",
green.only=TRUE,
path = targetDir,
columns=list(G=limmaCy5, Gb=limmaCy5b),
verbose = FALSE)
RG$E <- RG$E[with(RG$genes, order(Block, Column, Row)), ]
RG$genes <- RG$genes[with(RG$genes, order(Block, Column, Row)), ]
Foreground Intensity Distribution
plotIntensitiesForCohort(RG, 'Foreground intensities')

Background Intensity Distribution
plotIntensitiesForCohort(RG, 'Background intensities', FALSE)

Are Controls/Buffer spots different from each other or the regular probes?
plotIntensitiesForCohortByProbe(RG, 'Probewise intensities')

Background correction and normalization
controlProbes <- RG$genes$ID %in% c('Control')
bufferProbes <- RG$genes$Name %in% c('buffer', 'Buffer')
## Some control probes seem to be labelled as 'Buffer' in name column
## so we will treat them as buffer only
controlProbes <- setdiff(controlProbes, bufferProbes)
otherProbes <- RG$genes$ID %nin% c('Control') & RG$genes$Name %nin% c('buffer', 'Buffer')
status <- rep(NULL, length(RG$genes$ID))
status[controlProbes] <- 'Control'
status[bufferProbes] <- 'Buffer'
status[otherProbes] <- 'Regular'
## Neqc normalization using buffer genes as control
RG.neqc <- neqc(RG, status=status, negctrl = 'Buffer', regular='Regular', offset=50, robust = TRUE)
Correlation between duplicate spots
f <- factor(paste(targets$Condition, targets$Sex, sep="."))
design <- model.matrix(~0+f)
colnames(design) <- levels(f)
corfit <- duplicateCorrelation(RG.neqc, design, ndups=2)#, spacing = 1)
corfit$consensus.correlation
[1] 0.8019561
boxplot(tanh(corfit$atanh.correlations))

Calling DE genes
cont.matrix <- makeContrasts(CancervsHealthy=((cancer.female + cancer.male)/2-healthy.pooled),
levels=design)
fit <- lmFit(RG.neqc,
design, ndups=2,
correlation=corfit$consensus)
fit <- contrasts.fit(fit, cont.matrix)
fit <- eBayes(fit)
top <- topTable(fit, number = length(RG.neqc$genes$ID))
top.sig <- subset(top, adj.P.Val < 0.05)
write.csv(top, '../results/cancer_vs_healthy_neqc.all.csv', row.names = F)
write.csv(top.sig, '../results/cancer_vs_healthy_neqc.significant.csv', row.names = F)
GO enrichment of significant DE genes
genes <- top.sig$Name
genes.entrez <- get_entrez(genes)
ekg <- enrichGO(gene = genes.entrez,
OrgDb = org.Hs.eg.db,
pAdjustMethod = 'BH',
pvalueCutoff = 0.2,
qvalueCutoff = 0.2)
barplot(ekg)

writeego(ekg, 'cancer_vs_healthy_neqc.significant.GO')
KEGG enrichment of significant DE genes
genes <- top.sig$Name
genes.entrez <- get_entrez(genes)
ekg <- enrichKEGG(gene = genes.entrez,
organism = 'hsa',
pAdjustMethod = 'BH',
pvalueCutoff = 0.2,
qvalueCutoff = 0.2)
barplot(ekg, showCategory = 15)

writeEKG(ekg, 'cancer_vs_healthy_neqc.significant.KEGG')
---
title: "Analysis of Colon Cancer Microarray Datasets"
output: html_notebook
author: Saket Choudhary <saketkc@gmail.com>
date: 12/11/2017
---

```{r}
suppressMessages(library(limma))
suppressMessages(library(cowplot))
suppressMessages(library(data.table))
suppressMessages(library(reshape2))
suppressMessages(library(clusterProfiler))
suppressMessages(library(org.Hs.eg.db))

cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", 
               "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

```

# Utility Functions
```{r}
plotIntensitiesForCohort <- function(object, title, foreground=TRUE, takeLog=TRUE){
    disease <- grepl(".*CAC.*", colnames(object)) 
    control <- grepl(".*HCpool*", colnames(object)) 
    
    values.disease <- object[, disease]
    values.control <- object[, control]
    
    if (foreground){
        log2RowMeans.disease <- log2(rowMeans(as.data.frame(values.disease$E, row.names=0)))
        log2RowMeans.control <- log2(rowMeans(as.data.frame(values.control$E, row.names=0)))
  
    }
    else{
        log2RowMeans.disease <- log2(rowMeans(as.data.frame(values.disease$Eb, row.names=0)))
        log2RowMeans.control <- log2(rowMeans(as.data.frame(values.control$Eb, row.names=0)))
    }
    
    k <- list(log2RowMeans.disease, log2RowMeans.control)
  
    grp <- c(rep("Cancer", length(disease)),
             rep("Control", length(control)))
  
    df <- data.frame(x=unlist(k), grp=grp)
    if (takeLog){
        g = ggplot(df,aes(x = grp, y = log2(x) )) +
            geom_boxplot(aes(fill=factor(grp))) + 
            scale_fill_manual(breaks = c("Cancer", "Control"),
                                labels = c("Cancer", "Control"),                            
                                values = cbPalette[6:7]) + 
            xlab("Cohort") + 
            ylab("log2 foreground intensities") + 
            ggtitle(title)
        return (g)
        }
    if (takeLog == FALSE){
        g = ggplot(df,aes(x = grp, y = x)) + 
            geom_boxplot(aes(fill=factor(grp))) +
            scale_fill_manual(breaks = c("Cancer", "Control"),
                                labels = c("Cancer", "Control"),
                                values = cbPalette[6:7]) + 
            xlab("Cohort") +
            ylab("log2 foreground intensities") +
            ggtitle(title)
        return (g)
      }
}
log2TransformIntensities <- function(E)
{
  return (log2(rowMeans(as.data.frame(E, row.names=0)))) 
}

'%nin%' <- Negate('%in%')
palette12 <- c('#a6cee3', 
               '#1f78b4','#b2df8a',
               '#33a02c', '#fb9a99','#e31a1c', 
               '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928')

plotIntensitiesForCohortByProbe <- function(object, title){
  disease <- grepl(".*CAC.*", colnames(object)) 
  control <- grepl(".*HCpool*", colnames(object)) 
  
  disease <- object[, disease]
  control <- object[, control]
  
  controlProbes <- object$genes$ID %in% c('Control')
  bufferProbes <- object$genes$Name %in% c('buffer', 'Buffer')
  
  
  ## Some control probes seem to be labelled as 'Buffer' in name column
  ## so we will treat them as buffer only
  
  controlProbes <- setdiff(controlProbes, bufferProbes)
  otherProbes <-  object$genes$ID %nin% c('Control') & object$genes$Name %nin% c('buffer', 'Buffer')
  
  
  disease.E <- disease$E
  control.E <- control$E
  
  disease.Eb <- disease$Eb
  control.Eb <- control$Eb
  
  disease.E.control <- disease.E[controlProbes,]
  control.E.control <- control.E[controlProbes,]
  disease.E.buffer <- disease.E[bufferProbes,]
  control.E.buffer <- control.E[bufferProbes,]
  disease.E.others <- disease.E[controlProbes,]
  control.E.others <- control.E[controlProbes,]
  
  
  
  disease.Eb.control <- disease.Eb[controlProbes,]
  control.Eb.control <- control.Eb[controlProbes,]
  disease.Eb.buffer <- disease.Eb[bufferProbes,]
  control.Eb.buffer <- control.Eb[bufferProbes,]
  disease.Eb.others <- disease.Eb[controlProbes,]
  control.Eb.others <- control.Eb[controlProbes,]
  
  
  k <- list(log2TransformIntensities(disease.E.control),
            log2TransformIntensities(control.E.control),
            log2TransformIntensities(disease.E.buffer),
            log2TransformIntensities(control.E.buffer),
            #log2TransformIntensities(disease.E.others),
            #log2TransformIntensities(control.E.others),
            log2TransformIntensities(disease.Eb.control),
            log2TransformIntensities(control.Eb.control),
            log2TransformIntensities(disease.Eb.buffer),
            log2TransformIntensities(control.Eb.buffer)#,
            #log2TransformIntensities(disease.Eb.others),
            #log2TransformIntensities(control.Eb.others)
            )
  
  grp <- c(rep("Disease-Foreground-ControlProbes", length(disease)),
           rep("Control-Foreground-ControlProbes", length(control)),
           rep("Disease-Foreground-BufferProbes", length(disease)),
           rep("Control-Foreground-BufferProbes", length(control)),
           #rep("Disease-Foreground-OtherProbes", length(disease)),
           #rep("Control-Foreground-OtherProbes", length(control)),
           
           rep("Disease-Background-ControlProbes", length(disease)),
           rep("Control-Background-ControlProbes", length(control)),
           rep("Disease-Background-BufferProbes", length(disease)),
           rep("Control-Background-BufferProbes", length(control))#,
           #rep("Disease-Background-OtherProbes", length(disease)),
           #rep("Control-Background-OtherProbes", length(control))
           )
  
  df <- data.frame(x=unlist(k), grp=grp)
  g = ggplot(df,aes(x = grp, y = log2(x) )) +
    geom_boxplot(aes(fill=factor(grp))) + 
    scale_fill_manual(breaks = c("Disease-Foreground-ControlProbes",
                                 "Control-Foreground-ControlProbes",
                                 "Disease-Foreground-BufferProbes",
                                 "Control-Foreground-BufferProbes",
                                 #"Disease-Foreground-OtherProbes",
                                 #"Control-Foreground-OtherProbes",
                                 "Disease-Background-ControlProbes",
                                 "Control-Background-ControlProbes",
                                 "Disease-Background-BufferProbes",
                                 "Control-Background-BufferProbes"),
                                 #"Disease-Background-OtherProbes",
                                 #"Control-Background-OtherProbes"),
                      labels =c("Disease-Foreground-ControlProbes",
                                 "Control-Foreground-ControlProbes",
                                 "Disease-Foreground-BufferProbes",
                                 "Control-Foreground-BufferProbes",
                                 #"Disease-Foreground-OtherProbes",
                                 #"Control-Foreground-OtherProbes",
                                 "Disease-Background-ControlProbes",
                                 "Control-Background-ControlProbes",
                                 "Disease-Background-BufferProbes",
                                 "Control-Background-BufferProbes"),
                                 #"Disease-Background-OtherProbes",
                                 #"Control-Background-OtherProbes"),
                      values = palette12) + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    xlab("Cohort") + 
    ylab("log2 foreground intensities") + 
    ggtitle(title)
  return (g)
  
}


plotForegroundIntensities <- function(object, title){
  neg <- as.data.frame(object$E)
  
  g = ggplot(data = melt(neg), aes(x=variable, y=log2(value)) ) + geom_boxplot(aes(fill=variable)) +xlab("Samples 1-85") + ylab("log2 foreground") + ggtitle(title)
  ggsave(filename=paste(outputDirectory,title,".png", sep=""), plot=g)
}

plotControls <- function(object, title){
  neg <- as.data.frame(object$E)
  
  g = ggplot(data = melt(neg), aes(x=variable, y=log2(value)) ) + geom_boxplot(aes(fill=variable)) +xlab("Samples 1-85") + ylab("log2 foreground") + ggtitle(title)
  ggsave(filename=paste(outputDirectory,title,".png", sep=""), plot=g)
}
```


```{r}
get_entrez <- function(x){
   bitr(x, fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = org.Hs.eg.db)$ENTREZID
}

get_name <- function(x){
   bitr(x, fromType = 'ENTREZID', toType = 'SYMBOL', OrgDb = org.Hs.eg.db)$SYMBOL
}

writeEKG <- function(ekg, prefix){
  ekg <- as.data.frame(ekg)
  ekg.geneID <- strsplit(as.character(ekg$geneID), '/', fixed=TRUE)
  ekg.genes <- lapply(ekg.geneID, get_name)
  ekg$geneID <- ekg.genes
  ekg$geneID <- vapply(ekg$geneID , paste, collapse = ", ", character(1L))
  write.table(ekg, file.path('..','results', paste(prefix, 'tsv', sep='.')))
}



writeego <- function(ego, prefix){
  ego <- as.data.frame(ego)
  ego.geneID <- strsplit(as.character(ego$geneID), '/', fixed=TRUE)
  ego.genes <- lapply(ego.geneID, get_name)
  ego$geneID <- ego.genes
  ego$geneID <- vapply(ego$geneID , paste, collapse = ", ", character(1L))
  write.table(ego, file.path('..','results', paste(prefix, 'tsv', sep='.')))
}
```

# Targets
```{r}
createShortName <- function  (longFileName){
    splitted <- strsplit(longFileName, split = '_')
    return (paste(splitted[3], splitted[4], sep = '_'))
}
limmaCy5 <- 'F635 Median'
limmaCy5b <- 'B635 Median'
targetFile <- file.path('..', 'design_file.tsv')
targetDir <- file.path('..', 'data_new')

targets <- fread(targetFile)
targets$shortName <- lapply(targets$FileName, createShortName)
RG <- read.maimages( targets$FileName,
                     source="genepix.custom", 
                     green.only=TRUE,
                     path = targetDir,
                     columns=list(G=limmaCy5, Gb=limmaCy5b),
                     verbose = FALSE)
RG$E <- RG$E[with(RG$genes, order(Block, Column, Row)), ]
RG$genes <- RG$genes[with(RG$genes, order(Block, Column, Row)), ]

```

# Foreground Intensity Distribution

```{r, fig.width=15, fig.height=15}
plotIntensitiesForCohort(RG, 'Foreground intensities')
```

# Background Intensity Distribution

```{r, fig.width=15, fig.height=15}
plotIntensitiesForCohort(RG, 'Background intensities', FALSE)
```

# Are Controls/Buffer spots different from each other or the regular probes?

```{r, fig.width=15, fig.height=15}
plotIntensitiesForCohortByProbe(RG, 'Probewise  intensities')
```


```{r, eval=FALSE, echo=FALSE, warning=FALSE, error=FALSE, message=FALSE}
RG <- backgroundCorrect(RG, method="normexp", offset=50)
RG$E <- normalizeBetweenArrays(RG$E, method="quantile")
RG$E <- log2(RG$E)
```

# Background correction and normalization

```{r, warning=FALSE, error=FALSE, message=FALSE}
controlProbes <- RG$genes$ID %in% c('Control')
bufferProbes <- RG$genes$Name %in% c('buffer', 'Buffer')


## Some control probes seem to be labelled as 'Buffer' in name column
## so we will treat them as buffer only

controlProbes <- setdiff(controlProbes, bufferProbes)
otherProbes <-  RG$genes$ID %nin% c('Control') & RG$genes$Name %nin% c('buffer', 'Buffer')

status <- rep(NULL, length(RG$genes$ID))
status[controlProbes] <- 'Control'
status[bufferProbes] <- 'Buffer'
status[otherProbes] <- 'Regular'


## Neqc normalization using buffer genes as control
RG.neqc <- neqc(RG, status=status, negctrl = 'Buffer', regular='Regular', offset=50, robust = TRUE)



```

# Correlation between duplicate spots

```{r, warning=FALSE, error=FALSE, message=FALSE}
f <- factor(paste(targets$Condition, targets$Sex, sep="."))
design <- model.matrix(~0+f)
colnames(design) <- levels(f)
corfit <- duplicateCorrelation(RG.neqc, design, ndups=2)#, spacing = 1)
corfit$consensus.correlation
boxplot(tanh(corfit$atanh.correlations))

```


# Calling DE genes




```{r, warning=FALSE, error=FALSE, message=FALSE}
cont.matrix <- makeContrasts(CancervsHealthy=((cancer.female + cancer.male)/2-healthy.pooled),
                             levels=design)
fit <- lmFit(RG.neqc, 
             design, ndups=2,
             correlation=corfit$consensus)
fit <- contrasts.fit(fit, cont.matrix)
fit <- eBayes(fit)
top <- topTable(fit, number = length(RG.neqc$genes$ID))

top.sig <- subset(top, adj.P.Val < 0.05)
write.csv(top, '../results/cancer_vs_healthy_neqc.all.csv', row.names = F)
write.csv(top.sig, '../results/cancer_vs_healthy_neqc.significant.csv', row.names = F)
```


# GO enrichment of significant DE genes

```{r, warning=FALSE, error=FALSE, message=FALSE, fig.width=15, fig.height=15}

genes <- top.sig$Name
genes.entrez <- get_entrez(genes)
ekg <- enrichGO(gene = genes.entrez,
                OrgDb = org.Hs.eg.db,
                pAdjustMethod = 'BH',
                pvalueCutoff  = 0.2,
                qvalueCutoff  = 0.2)
barplot(ekg)

writeego(ekg, 'cancer_vs_healthy_neqc.significant.GO')
```
# KEGG enrichment of significant DE genes

```{r, warning=FALSE, error=FALSE, message=FALSE, fig.width=15, fig.height=15}
genes <- top.sig$Name
genes.entrez <- get_entrez(genes)
ekg <- enrichKEGG(gene = genes.entrez,
                    organism = 'hsa',
                    pAdjustMethod = 'BH',
                    pvalueCutoff  = 0.2,
                    qvalueCutoff  = 0.2)
barplot(ekg, showCategory = 15)
writeEKG(ekg, 'cancer_vs_healthy_neqc.significant.KEGG')
```
