library(limma)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd("~/Projects/2014-03-05_GlucoseTreatedIslets/UedanMicroarray/")



# Read normalized array data from SCIBLU
array <- read.csv("p0931_RMA_Data_Complete_Annotations.csv")


# Split array data into intensities and gene annotations
array.data <- array[,18:44]
array.samples <- array[,1:17]


# Create meta data frame from sample names
individ<-sapply(strsplit(colnames(array.data), "\\."), `[`, 2)
treat<-sapply(strsplit(colnames(array.data), "\\."), `[`, 3)
meta<-data.frame(colnames(array.data),individ,as.numeric(treat),treat2=ifelse(as.numeric(treat) > 8,"High","Low"))
colnames(meta)[3]<-"treat"

meta
##                                      colnames.array.data.  individ treat
## 1         X10_individual.32.7.3mM.rma.gene.default.Signal       32     7
## 2        X11_individual.32.14.3mM.rma.gene.default.Signal       32    14
## 3         X12_individual.33.3.3mM.rma.gene.default.Signal       33     3
## 4         X13_individual.33.7.3mM.rma.gene.default.Signal       33     7
## 5        X14_individual.33.14.3mM.rma.gene.default.Signal       33    14
## 6         X15_individual.34.3.3mM.rma.gene.default.Signal       34     3
## 7         X16_individual.34.7.3mM.rma.gene.default.Signal       34     7
## 8        X17_individual.34.14.3mM.rma.gene.default.Signal       34    14
## 9         X18_individual.35.3.3mM.rma.gene.default.Signal       35     3
## 10        X19_individual.35.7.3mM.rma.gene.default.Signal       35     7
## 11         X1_individual.41.3.3mM.rma.gene.default.Signal       41     3
## 12       X20_individual.35.14.3mM.rma.gene.default.Signal       35    14
## 13        X21_individual.36.3.3mM.rma.gene.default.Signal       36     3
## 14        X22_individual.36.7.3mM.rma.gene.default.Signal       36     7
## 15       X23_individual.36.14.3mM.rma.gene.default.Signal       36    14
## 16        X24_individual.37.3.3mM.rma.gene.default.Signal       37     3
## 17        X25_individual.37.7.3mM.rma.gene.default.Signal       37     7
## 18       X26_individual.37.14.3mM.rma.gene.default.Signal       37    14
## 19  X27_individual.diabetic.3.3mM.rma.gene.default.Signal diabetic     3
## 20  X28_individual.diabetic.7.3mM.rma.gene.default.Signal diabetic     7
## 21 X29_individual.diabetic.14.3mM.rma.gene.default.Signal diabetic    14
## 22         X2_individual.41.7.3mM.rma.gene.default.Signal       41     7
## 23        X3_individual.41.14.3mM.rma.gene.default.Signal       41    14
## 24         X6_individual.30.3.3mM.rma.gene.default.Signal       30     3
## 25         X7_individual.30.7.3mM.rma.gene.default.Signal       30     7
## 26        X8_individual.30.14.3mM.rma.gene.default.Signal       30    14
## 27         X9_individual.32.3.3mM.rma.gene.default.Signal       32     3
##    treat2
## 1     Low
## 2    High
## 3     Low
## 4     Low
## 5    High
## 6     Low
## 7     Low
## 8    High
## 9     Low
## 10    Low
## 11    Low
## 12   High
## 13    Low
## 14    Low
## 15   High
## 16    Low
## 17    Low
## 18   High
## 19    Low
## 20    Low
## 21   High
## 22    Low
## 23   High
## 24    Low
## 25    Low
## 26   High
## 27    Low
# MDS to visualize overall data structure
#png("MDS.png")
plotMDS(array.data,col=as.numeric(individ),labels=paste("Samp",individ,"_",treat,"mM",sep=""))
## Warning: NAs introduced by coercion

plot of chunk unnamed-chunk-1

#dev.off()


# inference

# create design matrix with patient and treatment (3+7 vs 14) as contrasts
design <- model.matrix(~factor(individ)+treat2,meta)


# limma stuff
fit <- lmFit(array.data, design)
fit <- eBayes(fit)
tt1<-topTable(fit, coef="treat2Low", n=40000000, adjust="BH")
tt1<-data.frame(Gene.Symbol=array.samples[rownames(tt1),"Gene.Symbol"],tt1)

# Clean gene symbol
tt1$Gene.Symbol <- as.character(tt1$Gene.Symbol)
tt1$Gene.Symbol <- gsub(" ","",tt1$Gene.Symbol)

options(stringsAsFactors = F)
rnaseq <- read.csv("1_AllSamples.limma.csv")


# merge with rna-seq
rna.array <- merge(rnaseq,tt1,by.x="hgnc_symbol",by.y="Gene.Symbol",all.x=T,all.y=F,suffixes = c("rnaseq","array"))


rna.array.filt <- filter(rna.array,P.Valuearray<.05)
qplot(rna.array.filt$logFCarray,rna.array.filt$logFCrnaseq)+geom_abline()

plot of chunk unnamed-chunk-1

#write file
write.csv(file="GTI.limma.RNAseq_array.csv",rna.array)