library(reshape2)
library(ggplot2)
library(limma)
library(knitr)
targets <- read.csv(targetFile, header=T)
f <- factor(targets$Condition, levels = unique(targets$Condition))
#f<-paste(targets$Condition, targets$Sex, sep=".")
#f<-factor(f,levels=c("Control.Male","Control.Female","MGGrade1.Male", "MGGrade1.Female", "MGGrade2.Male", "MGGrade2.Female"))
design <- model.matrix(~0+f)
colnames(design) <- levels(f)
cont.matrix <- makeContrasts(HCvsMG1_noSexCorrected=(MGGrade1)-(Control),
HCvsMG2_noSexCorrected=(MGGrade2)-(Control),
HCvsMG_noSexCorrected= MGGrade2/2+MGGrade1/2 - Control,
levels=design
)
## Read images treating G,Gb as Cy5,Cy5b as explained in the beginning
RG <- read.maimages( targets$FileName,
source="genepix.custom",
green.only=TRUE,
columns=list(G=limmaCy5,Gb=limmaCy5b)
)
## Custom background: LocalFeature
## Read Control_H-02_2000153953.gpr
## Custom background: LocalFeature
## Read Control_H-03_2000153943.gpr
## Custom background: LocalFeature
## Read Control_H-19_2000154008.gpr
## Custom background: LocalFeature
## Read Control_H-23_2000154009.gpr
## Custom background: LocalFeature
## Read Control_H-25_2000153952.gpr
## Custom background: LocalFeature
## Read Control_H-35_2000153942.gpr
## Custom background: LocalFeature
## Read Control_H-41_2000154026.gpr
## Custom background: LocalFeature
## Read Control_H-58_2000153935.gpr
## Custom background: LocalFeature
## Read Control_H-59_2000154016.gpr
## Custom background: LocalFeature
## Read Control_HC-25_2000153932.gpr
## Custom background: LocalFeature
## Read Control_HV-56_2000155735.gpr
## Custom background: LocalFeature
## Read Control_HV-59_2000155740.gpr
## Custom background: LocalFeature
## Read Control_HV-64_2000144456.gpr
## Custom background: LocalFeature
## Read Control_HV-70_2000144457.gpr
## Custom background: LocalFeature
## Read Control_HV-71_2000144458.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CH_17967_2000153914.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CJ_15491_2000153803.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CJ_15753_2000153802.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CJ_3577_2000153910.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CK_7710_2000153907.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CF_4450_2000153801.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CH_24953_2000153787.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_14742_2000153789.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_20619_2000153788.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_26538_2000153915.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_29452_2000153786.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_29583_2000153908.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_29822_2000153909.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_4231_2000153790.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_9179_2000153906.gpr
rownames(RG) <- RG$genes$ID
RG <- RG[order(rownames(RG)), ]
types <- data.frame(SpotType=c("Gene","Negative"),
ID=c("*","*"),
Name=c("*", limmaNegativeControlsRegexp),
col=c("blue", "red")
)
status <- controlStatus(types, RG$genes)
## Matching patterns for: ID Name
## Found 38400 Gene
## Found 192 Negative
## Setting attributes: values col
filterByID <- grep(limmaControlsRegexp, RG$genes[,"ID"])
filterByName <- grep(limmaControlsRegexp, RG$genes[,"Name"])
filterByNameAndID <- union(filterByID, filterByName)
RG.bc.nec <- nec(RG,
status=status,
negctrl="Negative",
regular="Gene",
robust=FALSE,
offset=limmaBackgroundOffset
)
RG.bc.nba <- normalizeBetweenArrays(RG.bc.nec, method=limmaNormalizationMethod)
RG.final <- RG.bc.nba[-filterByNameAndID,]
negatives<- grep("empty|Empty|CONTROL|BSA|Hela cell lysate|p300-BHC",RG$genes[,"ID"])
RG.negatives <- RG[negatives, ]
corfit <- duplicateCorrelation(RG.final,
design,
ndups=2,
spacing=1
)
## Loading required package: statmod
plotForegroundIntensitiesForCohort(RG, "Variation of log2 foreground intensities across all spots of all array types[Raw values]")
## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.negatives, "Variation of log2 foreground intensities across negative control spots over all array types[Raw values]")
## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.bc.nba, "Variation of log2 foreground intensities across all spots post background correction and quantile normalisation", FALSE)
## Saving 7 x 5 in image

plotForegroundIntensities(RG, "log2 foreground intensities of all control spots")
## Using as id variables
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).
## Saving 7 x 5 in image
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

fit <- lmFit(RG.final,
design,
ndups=2,
correlation=corfit$consensus)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
limmatopTableShortlist <- writeTopTable(fit2)
rawPreprocessedValues <- cbind(RG.final$genes, RG.final$E)
write.csv(limmatopTableShortlist, paste(outputDirectory, "limmatopTableShortlist.csv", sep=""), row.names=FALSE)
write.csv(rawPreprocessedValues, paste(outputDirectory, "rawPreprocessedValues.csv", sep=""), row.names=FALSE)
rawPreprocessedValues <- read.csv(paste(outputDirectory, "rawPreprocessedValues.csv", sep=""))
columnExtract<- paste("ID|Row|Column|Name|Block", limmatwoSampleRegexp, sep="|")
preprocessedValuesSample <- rawPreprocessedValues[, grep(columnExtract,names(rawPreprocessedValues))]
write.csv(preprocessedValuesSample, paste(outputDirectory, "preprocessedValuesSample.csv", sep=""), row.names=FALSE)
limmatopTableShortlist <- read.csv(paste(outputDirectory, "limmatopTableShortlist.csv", sep=""))
preprocessedValuesSample <- read.csv(paste(outputDirectory, "preprocessedValuesSample.csv", sep=""))
limmatopTableShortlist <- limmatopTableShortlist[!duplicated(limmatopTableShortlist$ID),]
limmaOnlyExpressionValues <- merge(preprocessedValuesSample,
limmatopTableShortlist,
by=c("Block","Row","Column","ID","Name"))
limmaOnlyExpressionValues <- limmaOnlyExpressionValues[!duplicated(limmaOnlyExpressionValues$ID),]
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv(paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""))
limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues,
limmaOnlyExpressionValues$adj.P.Val < limmaAdjPValueCutoff & limmaOnlyExpressionValues$B>limmaBValueCutoff )
limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues, limmaOnlyExpressionValues$logFC> limmalogFCcutoff | limmaOnlyExpressionValues$logFC< - limmalogFCcutoff)
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted_expandedinfo.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues$Name<-NULL
limmaOnlyExpressionValues$Block<-NULL
limmaOnlyExpressionValues$Row<-NULL
limmaOnlyExpressionValues$Column<-NULL
limmaOnlyExpressionValues$logFC <-NULL
limmaOnlyExpressionValues$AveExpr <- NULL
limmaOnlyExpressionValues$t <- NULL
limmaOnlyExpressionValues["P.Value"] <- NULL
limmaOnlyExpressionValues["adj.P.Val"] <- NULL
limmaOnlyExpressionValues["B"] <- NULL
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted.csv",sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv( paste(outputDirectory, "limma_shortlisted.csv",sep=""))
rownames(limmaOnlyExpressionValues) <- limmaOnlyExpressionValues$ID
output <- paste(outputDirectory, "limma_shortlisted.csv",sep="")
svminput <- paste(outputDirectory, "svm_input.csv", sep="")
cmd <- paste("/home/saket/anaconda/bin/python", "transpose.py", output, sep=" ")
system(cmd)
fn <- read.csv(paste(output, "_transpose.csv", sep=""))
fn$Labels <- fn$ID
fn$Labels <- gsub(limmaSampleRegexp1, "Control", fn$Labels)
fn$Labels <- gsub(limmaSampleRegexp2, "Disease", fn$Labels)
fn$ID <- NULL
write.csv(fn, svminput, row.names=FALSE)
cmd1 <- paste("/home/saket/anaconda/bin/python","orange_results.py", svminput, sep=" ")
system(cmd1)
svmJson <- paste(substr(svminput,1,nchar(svminput)-4), "_average_feature_rankings.json", sep="")
svmOutput <- paste(svminput, "_output.csv", sep="")
cmd2 <- paste("/home/saket/anaconda/bin/python", "orange_results.py", svminput, svmJson, ">", svmOutput, sep = " ")
system(cmd2)
output <- read.csv(svmOutput)
output <- output[,c(1,2,12)]
selectedGenes <- strsplit(as.character(output[which.min(output$Brier),]$FeatureList), split=" ")
shortlisted <- limmaOnlyExpressionValues[c(selectedGenes[[1]] ),]
melted <- melt(shortlisted, varnames=c("symbol", "sample"))
## Using ID as id variables
melted$factor<- melted$variable
melted$factor <- gsub(limmaSampleRegexp1, limmaSample1, melted$factor)
melted$factor <- gsub(limmaSampleRegexp2, limmaSample2, melted$factor)
gg = ggplot(melted) + aes(x=factor, y=(value)) + geom_point(aes(color =factor(factor)) ) + facet_wrap(~ID)
print(gg)

knitr::kable(output)
| 1 |
0.0149641 |
NM_001014444.1 |
| 2 |
0.0161294 |
NM_001014444.1 BC025985.1 |
| 3 |
0.0182341 |
NM_001014444.1 BC025985.1 BC006453.1 |
| 4 |
0.0153116 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 |
| 5 |
0.0178484 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 |
| 6 |
0.0190652 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 |
| 7 |
0.0389505 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 |
| 8 |
0.0149086 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 |
| 9 |
0.0166177 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 |
| 10 |
0.0204123 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 |
| 11 |
0.0197037 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 |
| 12 |
0.0193403 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 |
| 13 |
0.0176201 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 |
| 14 |
0.0183085 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 |
| 15 |
0.0266272 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 |
| 16 |
0.0292823 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 |
| 17 |
0.0177093 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 |
| 18 |
0.0148453 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 |
| 19 |
0.0195340 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 |
| 20 |
0.0186476 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 |
| 21 |
0.0228714 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 |
| 22 |
0.0143545 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 |
| 23 |
0.0169650 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 |
| 24 |
0.0157221 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 NM_002257.2 |
| 25 |
0.0195688 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 NM_002257.2 NM_016246.2 |
| 26 |
0.0162746 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 NM_002257.2 NM_016246.2 BC006011.1 |
| 27 |
0.0099496 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 NM_002257.2 NM_016246.2 BC006011.1 NM_148910.2 |
| 28 |
0.0172781 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 NM_002257.2 NM_016246.2 BC006011.1 NM_148910.2 BC062688.1 |
| 29 |
0.0236098 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 NM_002257.2 NM_016246.2 BC006011.1 NM_148910.2 BC062688.1 BC020637.1 |
| 30 |
0.0120214 |
NM_001014444.1 BC025985.1 BC006453.1 NM_001025266.1 NM_080820.3 NM_171830.1 BC065370.1 NM_002767.2 BC033854.1 BC080607.1 NM_014372.3 NM_007280.1 NM_004264.2 BC015738.1 XM_290842.4 BC037876.1 BC062437.1 BC073856.1 NM_198086.1 BC004130.2 NM_001039656.1 NM_001042476.1 BC008254.1 NM_002257.2 NM_016246.2 BC006011.1 NM_148910.2 BC062688.1 BC020637.1 Nol3 |