library(reshape2)
library(ggplot2)
library(limma)
library(knitr)
targets <- read.csv(targetFile, header=T)
f<-paste(targets$Condition, targets$Sex, sep=".")
f<-factor(f,levels=c("Control.Male","Control.Female","MGGrade1.Male", "MGGrade1.Female", "MGGrade2.Male", "MGGrade2.Female"))
design <- model.matrix(~0+f)
colnames(design) <- levels(f)
cont.matrix <- makeContrasts(HCvsMG1_SexCorrected=(MGGrade1.Female-MGGrade1.Male)-(Control.Female-Control.Male),
HCvsMG2_SexCorrected=(MGGrade2.Female-MGGrade2.Male)-(Control.Female-Control.Male),
HCvsMG_SexCorrected=((MGGrade1.Female-MGGrade1.Male)+(MGGrade2.Female-MGGrade2.Male))/2-(Control.Female-Control.Male),
levels=design
)
## Read images treating G,Gb as Cy5,Cy5b as explained in the beginning
RG <- read.maimages( targets$FileName,
source="genepix.custom",
green.only=TRUE,
columns=list(G=limmaCy5,Gb=limmaCy5b)
)
## Custom background: LocalFeature
## Read Control_H-02_2000153953.gpr
## Custom background: LocalFeature
## Read Control_H-03_2000153943.gpr
## Custom background: LocalFeature
## Read Control_H-19_2000154008.gpr
## Custom background: LocalFeature
## Read Control_H-23_2000154009.gpr
## Custom background: LocalFeature
## Read Control_H-25_2000153952.gpr
## Custom background: LocalFeature
## Read Control_H-35_2000153942.gpr
## Custom background: LocalFeature
## Read Control_H-41_2000154026.gpr
## Custom background: LocalFeature
## Read Control_HV-56_2000155735.gpr
## Custom background: LocalFeature
## Read Control_HV-59_2000155740.gpr
## Custom background: LocalFeature
## Read Control_HV-64_2000144456.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CF_4450_2000153801.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CH_24953_2000153787.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_4231_2000153790.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_9179_2000153906.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_14742_2000153789.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_20619_2000153788.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_26538_2000153915.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_29452_2000153786.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_29583_2000153908.gpr
## Custom background: LocalFeature
## Read MG_Grade_I_CJ_29822_2000153909.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CH_17967_2000153914.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CJ_3577_2000153910.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CJ_15491_2000153803.gpr
## Custom background: LocalFeature
## Read MG_Grade_II_CJ_15753_2000153802.gpr
rownames(RG) <- RG$genes$ID
RG <- RG[order(rownames(RG)), ]
types <- data.frame(SpotType=c("Gene","Negative"),
ID=c("*","*"),
Name=c("*", limmaNegativeControlsRegexp),
col=c("blue", "red")
)
status <- controlStatus(types, RG$genes)
## Matching patterns for: ID Name
## Found 38400 Gene
## Found 192 Negative
## Setting attributes: values col
filterByID <- grep(limmaControlsRegexp, RG$genes[,"ID"])
filterByName <- grep(limmaControlsRegexp, RG$genes[,"Name"])
filterByNameAndID <- union(filterByID, filterByName)
RG.bc.nec <- nec(RG,
status=status,
negctrl="Negative",
regular="Gene",
robust=FALSE,
offset=limmaBackgroundOffset
)
RG.bc.nba <- normalizeBetweenArrays(RG.bc.nec, method=limmaNormalizationMethod)
RG.final <- RG.bc.nba[-filterByNameAndID,]
negatives<- grep("empty|Empty|CONTROL|BSA|Hela cell lysate|p300-BHC",RG$genes[,"ID"])
RG.negatives <- RG[negatives, ]
corfit <- duplicateCorrelation(RG.final,
design,
ndups=2,
spacing=1
)
## Loading required package: statmod
plotForegroundIntensitiesForCohort(RG, "Variation of log2 foreground intensities across all spots of all array types[Raw values]")
## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.negatives, "Variation of log2 foreground intensities across negative control spots over all array types[Raw values]")
## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.bc.nba, "Variation of log2 foreground intensities across all spots post background correction and quantile normalisation", FALSE)
## Saving 7 x 5 in image

plotForegroundIntensities(RG, "log2 foreground intensities of all control spots")
## Using as id variables
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).
## Saving 7 x 5 in image
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

fit <- lmFit(RG.final,
design,
ndups=2,
correlation=corfit$consensus)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
limmatopTableShortlist <- writeTopTable(fit2)
rawPreprocessedValues <- cbind(RG.final$genes, RG.final$E)
write.csv(limmatopTableShortlist, paste(outputDirectory, "limmatopTableShortlist.csv", sep=""), row.names=FALSE)
write.csv(rawPreprocessedValues, paste(outputDirectory, "rawPreprocessedValues.csv", sep=""), row.names=FALSE)
rawPreprocessedValues <- read.csv(paste(outputDirectory, "rawPreprocessedValues.csv", sep=""))
columnExtract<- paste("ID|Row|Column|Name|Block", limmatwoSampleRegexp, sep="|")
preprocessedValuesSample <- rawPreprocessedValues[, grep(columnExtract,names(rawPreprocessedValues))]
write.csv(preprocessedValuesSample, paste(outputDirectory, "preprocessedValuesSample.csv", sep=""), row.names=FALSE)
limmatopTableShortlist <- read.csv(paste(outputDirectory, "limmatopTableShortlist.csv", sep=""))
preprocessedValuesSample <- read.csv(paste(outputDirectory, "preprocessedValuesSample.csv", sep=""))
limmatopTableShortlist <- limmatopTableShortlist[!duplicated(limmatopTableShortlist$ID),]
limmaOnlyExpressionValues <- merge(preprocessedValuesSample,
limmatopTableShortlist,
by=c("Block","Row","Column","ID","Name"))
limmaOnlyExpressionValues <- limmaOnlyExpressionValues[!duplicated(limmaOnlyExpressionValues$ID),]
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv(paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""))
limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues,
limmaOnlyExpressionValues$adj.P.Val < limmaAdjPValueCutoff & limmaOnlyExpressionValues$B>limmaBValueCutoff )
limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues, limmaOnlyExpressionValues$logFC> limmalogFCcutoff | limmaOnlyExpressionValues$logFC< - limmalogFCcutoff)
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted_expandedinfo.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues$Name<-NULL
limmaOnlyExpressionValues$Block<-NULL
limmaOnlyExpressionValues$Row<-NULL
limmaOnlyExpressionValues$Column<-NULL
limmaOnlyExpressionValues$logFC <-NULL
limmaOnlyExpressionValues$AveExpr <- NULL
limmaOnlyExpressionValues$t <- NULL
limmaOnlyExpressionValues["P.Value"] <- NULL
limmaOnlyExpressionValues["adj.P.Val"] <- NULL
limmaOnlyExpressionValues["B"] <- NULL
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted.csv",sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv( paste(outputDirectory, "limma_shortlisted.csv",sep=""))
rownames(limmaOnlyExpressionValues) <- limmaOnlyExpressionValues$ID
output <- paste(outputDirectory, "limma_shortlisted.csv",sep="")
svminput <- paste(outputDirectory, "svm_input.csv", sep="")
cmd <- paste("/home/saket/anaconda/bin/python", "transpose.py", output, sep=" ")
system(cmd)
fn <- read.csv(paste(output, "_transpose.csv", sep=""))
fn$Labels <- fn$ID
fn$Labels <- gsub(limmaSampleRegexp1, "Control", fn$Labels)
fn$Labels <- gsub(limmaSampleRegexp2, "Disease", fn$Labels)
fn$ID <- NULL
write.csv(fn, svminput, row.names=FALSE)
cmd1 <- paste("/home/saket/anaconda/bin/python","orange_results.py", svminput, sep=" ")
system(cmd1)
svmJson <- paste(substr(svminput,1,nchar(svminput)-4), "_average_feature_rankings.json", sep="")
svmOutput <- paste(svminput, "_output.csv", sep="")
cmd2 <- paste("/home/saket/anaconda/bin/python", "orange_results.py", svminput, svmJson, ">", svmOutput, sep = " ")
system(cmd2)
output <- read.csv(svmOutput)
output <- output[,c(1,2,12)]
#kable(output)
selectedGenes <- strsplit(as.character(output[30,]$FeatureList), split=" ")
shortlisted <- limmaOnlyExpressionValues[c(selectedGenes[[1]] ),]
melted <- melt(shortlisted, varnames=c("symbol", "sample"))
## Using ID as id variables
melted$factor<- melted$variable
melted$factor <- gsub(limmaSampleRegexp1, limmaSample1, melted$factor)
melted$factor <- gsub(limmaSampleRegexp2, limmaSample2, melted$factor)
gg = ggplot(melted) + aes(x=factor, y=(value)) + geom_point(aes(color =factor(factor)) ) + facet_wrap(~ID)
print(gg)

knitr::kable(output)
| 1 |
0.3101087 |
NM_007280.1 |
| 2 |
0.1898314 |
NM_007280.1 NM_001025436.1 |
| 3 |
0.1019387 |
NM_007280.1 NM_001025436.1 NM_032838.2 |
| 4 |
0.1001704 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 |
| 5 |
0.1405390 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 |
| 6 |
0.1136362 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 |
| 7 |
0.1369756 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 |
| 8 |
0.1486755 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 |
| 9 |
0.0856016 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 |
| 10 |
0.0916005 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 |
| 11 |
0.0829444 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 |
| 12 |
0.0768960 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 |
| 13 |
0.0808394 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 |
| 14 |
0.0841105 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 |
| 15 |
0.0679298 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 |
| 16 |
0.0320617 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 |
| 17 |
0.0279850 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 |
| 18 |
0.0645226 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 |
| 19 |
0.0827627 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 |
| 20 |
0.0905909 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 |
| 21 |
0.0466633 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 |
| 22 |
0.0487574 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 |
| 23 |
0.0922666 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 |
| 24 |
0.0845409 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 |
| 25 |
0.0962817 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 |
| 26 |
0.0802237 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 |
| 27 |
0.1545900 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1 |
| 28 |
0.0580788 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1 NM_080473.3 |
| 29 |
0.0938396 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1 NM_080473.3 NM_052958.1 |
| 30 |
0.0566329 |
NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1 NM_080473.3 NM_052958.1 NM_032472.3 |