HC vs MG1 Sex Corrected 10 Control Samples

library(reshape2)
library(ggplot2)
library(limma)
library(knitr)

targets <- read.csv(targetFile, header=T)
f<-paste(targets$Condition, targets$Sex, sep=".")
f<-factor(f,levels=c("Control.Male","Control.Female","MGGrade1.Male", "MGGrade1.Female", "MGGrade2.Male",  "MGGrade2.Female"))
design <- model.matrix(~0+f)

colnames(design) <- levels(f)
cont.matrix <- makeContrasts(HCvsMG1_SexCorrected=(MGGrade1.Female-MGGrade1.Male)-(Control.Female-Control.Male),
                             HCvsMG2_SexCorrected=(MGGrade2.Female-MGGrade2.Male)-(Control.Female-Control.Male),
                             HCvsMG_SexCorrected=((MGGrade1.Female-MGGrade1.Male)+(MGGrade2.Female-MGGrade2.Male))/2-(Control.Female-Control.Male),
                             levels=design
                             )

## Read images treating G,Gb as Cy5,Cy5b as explained in the beginning
RG <- read.maimages( targets$FileName,
                     source="genepix.custom", 
                     green.only=TRUE, 
                     columns=list(G=limmaCy5,Gb=limmaCy5b)
      )

## Custom background: LocalFeature 
## Read Control_H-02_2000153953.gpr 
## Custom background: LocalFeature 
## Read Control_H-03_2000153943.gpr 
## Custom background: LocalFeature 
## Read Control_H-19_2000154008.gpr 
## Custom background: LocalFeature 
## Read Control_H-23_2000154009.gpr 
## Custom background: LocalFeature 
## Read Control_H-25_2000153952.gpr 
## Custom background: LocalFeature 
## Read Control_H-35_2000153942.gpr 
## Custom background: LocalFeature 
## Read Control_H-41_2000154026.gpr 
## Custom background: LocalFeature 
## Read Control_HV-56_2000155735.gpr 
## Custom background: LocalFeature 
## Read Control_HV-59_2000155740.gpr 
## Custom background: LocalFeature 
## Read Control_HV-64_2000144456.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CF_4450_2000153801.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CH_24953_2000153787.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_4231_2000153790.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_9179_2000153906.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_14742_2000153789.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_20619_2000153788.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_26538_2000153915.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_29452_2000153786.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_29583_2000153908.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_29822_2000153909.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CH_17967_2000153914.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CJ_3577_2000153910.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CJ_15491_2000153803.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CJ_15753_2000153802.gpr

rownames(RG) <- RG$genes$ID
RG <- RG[order(rownames(RG)), ]

types <- data.frame(SpotType=c("Gene","Negative"),
                    ID=c("*","*"), 
                    Name=c("*", limmaNegativeControlsRegexp), 
                    col=c("blue", "red")                  
                    )
status <- controlStatus(types, RG$genes)

## Matching patterns for: ID Name 
## Found 38400 Gene 
## Found 192 Negative 
## Setting attributes: values col

filterByID <-  grep(limmaControlsRegexp, RG$genes[,"ID"])
filterByName <-  grep(limmaControlsRegexp, RG$genes[,"Name"])
filterByNameAndID <- union(filterByID, filterByName)
RG.bc.nec <- nec(RG, 
               status=status, 
               negctrl="Negative", 
               regular="Gene", 
               robust=FALSE, 
               offset=limmaBackgroundOffset
               )

RG.bc.nba <-  normalizeBetweenArrays(RG.bc.nec, method=limmaNormalizationMethod)
RG.final <- RG.bc.nba[-filterByNameAndID,]
negatives<- grep("empty|Empty|CONTROL|BSA|Hela cell lysate|p300-BHC",RG$genes[,"ID"])
RG.negatives <- RG[negatives, ]

corfit <- duplicateCorrelation(RG.final, 
                               design, 
                               ndups=2, 
                               spacing=1
                               )

## Loading required package: statmod

plotForegroundIntensitiesForCohort(RG, "Variation of log2 foreground intensities across  all spots of all array types[Raw values]")

## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.negatives, "Variation of log2 foreground intensities across negative control spots over all array types[Raw values]")

## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.bc.nba, "Variation of log2 foreground intensities across all spots post background correction and quantile normalisation", FALSE)

## Saving 7 x 5 in image

plotForegroundIntensities(RG, "log2 foreground intensities of all  control spots")

## Using  as id variables

## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

## Saving 7 x 5 in image

## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

fit <- lmFit(RG.final, 
             design, 
             ndups=2, 
             correlation=corfit$consensus)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
limmatopTableShortlist <- writeTopTable(fit2)
rawPreprocessedValues <- cbind(RG.final$genes, RG.final$E)


write.csv(limmatopTableShortlist, paste(outputDirectory, "limmatopTableShortlist.csv", sep=""), row.names=FALSE)
write.csv(rawPreprocessedValues, paste(outputDirectory, "rawPreprocessedValues.csv", sep=""), row.names=FALSE)

rawPreprocessedValues <- read.csv(paste(outputDirectory, "rawPreprocessedValues.csv", sep=""))
columnExtract<- paste("ID|Row|Column|Name|Block", limmatwoSampleRegexp, sep="|")
preprocessedValuesSample <- rawPreprocessedValues[, grep(columnExtract,names(rawPreprocessedValues))]
write.csv(preprocessedValuesSample, paste(outputDirectory, "preprocessedValuesSample.csv", sep=""), row.names=FALSE)
limmatopTableShortlist <- read.csv(paste(outputDirectory, "limmatopTableShortlist.csv", sep=""))
preprocessedValuesSample <- read.csv(paste(outputDirectory, "preprocessedValuesSample.csv", sep=""))
limmatopTableShortlist <- limmatopTableShortlist[!duplicated(limmatopTableShortlist$ID),]
limmaOnlyExpressionValues <- merge(preprocessedValuesSample, 
                limmatopTableShortlist, 
                by=c("Block","Row","Column","ID","Name"))
limmaOnlyExpressionValues <- limmaOnlyExpressionValues[!duplicated(limmaOnlyExpressionValues$ID),]
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv(paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""))

limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues, 
                                    limmaOnlyExpressionValues$adj.P.Val < limmaAdjPValueCutoff & limmaOnlyExpressionValues$B>limmaBValueCutoff )
limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues, limmaOnlyExpressionValues$logFC> limmalogFCcutoff | limmaOnlyExpressionValues$logFC< - limmalogFCcutoff)

write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted_expandedinfo.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues$Name<-NULL
limmaOnlyExpressionValues$Block<-NULL
limmaOnlyExpressionValues$Row<-NULL
limmaOnlyExpressionValues$Column<-NULL
limmaOnlyExpressionValues$logFC <-NULL
limmaOnlyExpressionValues$AveExpr <- NULL
limmaOnlyExpressionValues$t <- NULL
limmaOnlyExpressionValues["P.Value"] <- NULL
limmaOnlyExpressionValues["adj.P.Val"] <- NULL
limmaOnlyExpressionValues["B"] <- NULL

write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted.csv",sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv( paste(outputDirectory, "limma_shortlisted.csv",sep=""))

rownames(limmaOnlyExpressionValues) <- limmaOnlyExpressionValues$ID
output <- paste(outputDirectory, "limma_shortlisted.csv",sep="")
svminput <- paste(outputDirectory, "svm_input.csv", sep="")
cmd <- paste("/home/saket/anaconda/bin/python", "transpose.py", output, sep=" ")
system(cmd)
fn <- read.csv(paste(output, "_transpose.csv", sep=""))
fn$Labels <- fn$ID
fn$Labels <- gsub(limmaSampleRegexp1, "Control", fn$Labels)
fn$Labels <- gsub(limmaSampleRegexp2, "Disease", fn$Labels)
fn$ID <- NULL
write.csv(fn, svminput, row.names=FALSE)
cmd1 <- paste("/home/saket/anaconda/bin/python","orange_results.py", svminput, sep=" ")
system(cmd1)
svmJson  <- paste(substr(svminput,1,nchar(svminput)-4), "_average_feature_rankings.json", sep="")
svmOutput <- paste(svminput, "_output.csv", sep="")
cmd2 <- paste("/home/saket/anaconda/bin/python", "orange_results.py", svminput, svmJson, ">", svmOutput, sep = "  ")
system(cmd2)
output <- read.csv(svmOutput)
output <- output[,c(1,2,12)]
#kable(output)
selectedGenes <- strsplit(as.character(output[30,]$FeatureList), split=" ") 
shortlisted <- limmaOnlyExpressionValues[c(selectedGenes[[1]] ),]
melted <- melt(shortlisted, varnames=c("symbol", "sample"))

## Using ID as id variables

melted$factor<- melted$variable
melted$factor <- gsub(limmaSampleRegexp1, limmaSample1, melted$factor)
melted$factor <- gsub(limmaSampleRegexp2, limmaSample2, melted$factor)
gg = ggplot(melted) + aes(x=factor, y=(value)) + geom_point(aes(color =factor(factor)) ) + facet_wrap(~ID)
print(gg)

knitr::kable(output)

NF	Brier	FeatureList
1	0.3101087	NM_007280.1
2	0.1898314	NM_007280.1 NM_001025436.1
3	0.1019387	NM_007280.1 NM_001025436.1 NM_032838.2
4	0.1001704	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2
5	0.1405390	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1
6	0.1136362	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3
7	0.1369756	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4
8	0.1486755	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2
9	0.0856016	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1
10	0.0916005	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2
11	0.0829444	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1
12	0.0768960	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1
13	0.0808394	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1
14	0.0841105	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2
15	0.0679298	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2
16	0.0320617	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2
17	0.0279850	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1
18	0.0645226	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2
19	0.0827627	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657
20	0.0905909	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1
21	0.0466633	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1
22	0.0487574	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2
23	0.0922666	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2
24	0.0845409	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1
25	0.0962817	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2
26	0.0802237	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1
27	0.1545900	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1
28	0.0580788	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1 NM_080473.3
29	0.0938396	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1 NM_080473.3 NM_052958.1
30	0.0566329	NM_007280.1 NM_001025436.1 NM_032838.2 NM_016172.2 BC028152.1 NM_017721.3 NM_022137.4 NM_144607.2 XM_379897.1 NM_183006.2 BC011991.1 BC067301.1 NM_006304.1 BC000765.2 NM_012280.2 NM_007045.2 NM_001003799.1 NM_002771.2 BC072657 BC008605.1 BC043488.1 NM_003616.2 NM_000546.2 BC020677.1 NM_001551.2 BC015848.1 NM_207521.1 NM_080473.3 NM_052958.1 NM_032472.3

HC vs MG1 Sex Corrected 10 Control Samples

Saket Choudhary

October 26, 2015