HC vs MG1 No Sex Correction 15 Control Samples

library(reshape2)
library(ggplot2)
library(limma)
library(knitr)

targets <- read.csv(targetFile, header=T)
f <- factor(targets$Condition, levels = unique(targets$Condition))
#f<-paste(targets$Condition, targets$Sex, sep=".")
#f<-factor(f,levels=c("Control.Male","Control.Female","MGGrade1.Male", "MGGrade1.Female", "MGGrade2.Male",  "MGGrade2.Female"))
design <- model.matrix(~0+f)

colnames(design) <- levels(f)
cont.matrix <- makeContrasts(HCvsMG1_noSexCorrected=(MGGrade1)-(Control),
                             HCvsMG2_noSexCorrected=(MGGrade2)-(Control),
                             HCvsMG_noSexCorrected= MGGrade2/2+MGGrade1/2 - Control,
                             levels=design
                             )

## Read images treating G,Gb as Cy5,Cy5b as explained in the beginning
RG <- read.maimages( targets$FileName,
                     source="genepix.custom", 
                     green.only=TRUE, 
                     columns=list(G=limmaCy5,Gb=limmaCy5b)
      )

## Custom background: LocalFeature 
## Read Control_H-02_2000153953.gpr 
## Custom background: LocalFeature 
## Read Control_H-03_2000153943.gpr 
## Custom background: LocalFeature 
## Read Control_H-19_2000154008.gpr 
## Custom background: LocalFeature 
## Read Control_H-23_2000154009.gpr 
## Custom background: LocalFeature 
## Read Control_H-25_2000153952.gpr 
## Custom background: LocalFeature 
## Read Control_H-35_2000153942.gpr 
## Custom background: LocalFeature 
## Read Control_H-41_2000154026.gpr 
## Custom background: LocalFeature 
## Read Control_H-58_2000153935.gpr 
## Custom background: LocalFeature 
## Read Control_H-59_2000154016.gpr 
## Custom background: LocalFeature 
## Read Control_HC-25_2000153932.gpr 
## Custom background: LocalFeature 
## Read Control_HV-56_2000155735.gpr 
## Custom background: LocalFeature 
## Read Control_HV-59_2000155740.gpr 
## Custom background: LocalFeature 
## Read Control_HV-64_2000144456.gpr 
## Custom background: LocalFeature 
## Read Control_HV-70_2000144457.gpr 
## Custom background: LocalFeature 
## Read Control_HV-71_2000144458.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CH_17967_2000153914.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CJ_15491_2000153803.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CJ_15753_2000153802.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CJ_3577_2000153910.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_II_CK_7710_2000153907.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CF_4450_2000153801.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CH_24953_2000153787.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_14742_2000153789.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_20619_2000153788.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_26538_2000153915.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_29452_2000153786.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_29583_2000153908.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_29822_2000153909.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_4231_2000153790.gpr 
## Custom background: LocalFeature 
## Read MG_Grade_I_CJ_9179_2000153906.gpr

rownames(RG) <- RG$genes$ID
RG <- RG[order(rownames(RG)), ]

types <- data.frame(SpotType=c("Gene","Negative"),
                    ID=c("*","*"), 
                    Name=c("*", limmaNegativeControlsRegexp), 
                    col=c("blue", "red")                  
                    )
status <- controlStatus(types, RG$genes)

## Matching patterns for: ID Name 
## Found 38400 Gene 
## Found 192 Negative 
## Setting attributes: values col

filterByID <-  grep(limmaControlsRegexp, RG$genes[,"ID"])
filterByName <-  grep(limmaControlsRegexp, RG$genes[,"Name"])
filterByNameAndID <- union(filterByID, filterByName)
RG.bc.nec <- nec(RG, 
               status=status, 
               negctrl="Negative", 
               regular="Gene", 
               robust=FALSE, 
               offset=limmaBackgroundOffset
               )

RG.bc.nba <-  normalizeBetweenArrays(RG.bc.nec, method=limmaNormalizationMethod)
RG.final <- RG.bc.nba[-filterByNameAndID,]
negatives<- grep("empty|Empty|CONTROL|BSA|Hela cell lysate|p300-BHC",RG$genes[,"ID"])
RG.negatives <- RG[negatives, ]

corfit <- duplicateCorrelation(RG.final, 
                               design, 
                               ndups=2, 
                               spacing=1
                               )
plotForegroundIntensitiesForCohort(RG, "Variation of log2 foreground intensities across  all spots of all array types[Raw values]")

## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.negatives, "Variation of log2 foreground intensities across negative control spots over all array types[Raw values]")

## Saving 7 x 5 in image

plotForegroundIntensitiesForCohort(RG.bc.nba, "Variation of log2 foreground intensities across all spots post background correction and quantile normalisation", FALSE)

## Saving 7 x 5 in image

plotForegroundIntensities(RG, "log2 foreground intensities of all  control spots")

## Using  as id variables

## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

## Saving 7 x 5 in image

## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

fit <- lmFit(RG.final, 
             design, 
             ndups=2, 
             correlation=corfit$consensus)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
limmatopTableShortlist <- writeTopTable(fit2)
rawPreprocessedValues <- cbind(RG.final$genes, RG.final$E)


write.csv(limmatopTableShortlist, paste(outputDirectory, "limmatopTableShortlist.csv", sep=""), row.names=FALSE)
write.csv(rawPreprocessedValues, paste(outputDirectory, "rawPreprocessedValues.csv", sep=""), row.names=FALSE)

rawPreprocessedValues <- read.csv(paste(outputDirectory, "rawPreprocessedValues.csv", sep=""))
columnExtract<- paste("ID|Row|Column|Name|Block", limmatwoSampleRegexp, sep="|")
preprocessedValuesSample <- rawPreprocessedValues[, grep(columnExtract,names(rawPreprocessedValues))]
write.csv(preprocessedValuesSample, paste(outputDirectory, "preprocessedValuesSample.csv", sep=""), row.names=FALSE)
limmatopTableShortlist <- read.csv(paste(outputDirectory, "limmatopTableShortlist.csv", sep=""))
preprocessedValuesSample <- read.csv(paste(outputDirectory, "preprocessedValuesSample.csv", sep=""))
limmatopTableShortlist <- limmatopTableShortlist[!duplicated(limmatopTableShortlist$ID),]
limmaOnlyExpressionValues <- merge(preprocessedValuesSample, 
                limmatopTableShortlist, 
                by=c("Block","Row","Column","ID","Name"))
limmaOnlyExpressionValues <- limmaOnlyExpressionValues[!duplicated(limmaOnlyExpressionValues$ID),]
write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv(paste(outputDirectory, "limmaOnlyExpressionValues.csv", sep=""))

limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues, 
                                    limmaOnlyExpressionValues$adj.P.Val < limmaAdjPValueCutoff & limmaOnlyExpressionValues$B>limmaBValueCutoff )
limmaOnlyExpressionValues <- subset(limmaOnlyExpressionValues, limmaOnlyExpressionValues$logFC> limmalogFCcutoff | limmaOnlyExpressionValues$logFC< - limmalogFCcutoff)

write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted_expandedinfo.csv", sep=""), row.names=FALSE)
limmaOnlyExpressionValues$Name<-NULL
limmaOnlyExpressionValues$Block<-NULL
limmaOnlyExpressionValues$Row<-NULL
limmaOnlyExpressionValues$Column<-NULL
limmaOnlyExpressionValues$logFC <-NULL
limmaOnlyExpressionValues$AveExpr <- NULL
limmaOnlyExpressionValues$t <- NULL
limmaOnlyExpressionValues["P.Value"] <- NULL
limmaOnlyExpressionValues["adj.P.Val"] <- NULL
limmaOnlyExpressionValues["B"] <- NULL

write.csv(limmaOnlyExpressionValues, paste(outputDirectory, "limma_shortlisted.csv",sep=""), row.names=FALSE)
limmaOnlyExpressionValues <- read.csv( paste(outputDirectory, "limma_shortlisted.csv",sep=""))

rownames(limmaOnlyExpressionValues) <- limmaOnlyExpressionValues$ID
output <- paste(outputDirectory, "limma_shortlisted.csv",sep="")
svminput <- paste(outputDirectory, "svm_input.csv", sep="")
cmd <- paste("/home/saket/anaconda/bin/python", "transpose.py", output, sep=" ")
system(cmd)
fn <- read.csv(paste(output, "_transpose.csv", sep=""))
fn$Labels <- fn$ID
fn$Labels <- gsub(limmaSampleRegexp1, "Control", fn$Labels)
fn$Labels <- gsub(limmaSampleRegexp2, "Disease", fn$Labels)
fn$ID <- NULL
write.csv(fn, svminput, row.names=FALSE)
cmd1 <- paste("/home/saket/anaconda/bin/python","orange_results.py", svminput, sep=" ")
system(cmd1)
svmJson  <- paste(substr(svminput,1,nchar(svminput)-4), "_average_feature_rankings.json", sep="")
svmOutput <- paste(svminput, "_output.csv", sep="")
cmd2 <- paste("/home/saket/anaconda/bin/python", "orange_results.py", svminput, svmJson, ">", svmOutput, sep = "  ")
system(cmd2)
output <- read.csv(svmOutput)
output <- output[,c(1,2,12)]
#kable(output)
#which.min(output$Brier)
selectedGenes <- strsplit(as.character(output[30,]$FeatureList), split=" ") 
shortlisted <- limmaOnlyExpressionValues[c(selectedGenes[[1]] ),]
melted <- melt(shortlisted, varnames=c("symbol", "sample"))

## Using ID as id variables

melted$factor<- melted$variable
melted$factor <- gsub(limmaSampleRegexp1, limmaSample1, melted$factor)
melted$factor <- gsub(limmaSampleRegexp2, limmaSample2, melted$factor)
gg = ggplot(melted) + aes(x=factor, y=(value)) + geom_point(aes(color =factor(factor)) ) + facet_wrap(~ID)
print(gg)

knitr::kable(output)

NF	Brier	FeatureList
1	0.0144722	NM_001014444.1
2	0.0104859	NM_001014444.1 BC025985.1
3	0.0266607	NM_001014444.1 BC025985.1 NM_001025266.1
4	0.0104986	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1
5	0.0105765	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1
6	0.0308628	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1
7	0.0294568	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1
8	0.0136500	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1
9	0.0124763	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2
10	0.0297463	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2
11	0.0169075	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1
12	0.0162480	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2
13	0.0122454	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2
14	0.0113116	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3
15	0.0119256	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2
16	0.0154521	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2
17	0.0114240	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2
18	0.0114022	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2
19	0.0099919	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3
20	0.0120992	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1
21	0.0143608	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3
22	0.0135547	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1
23	0.0102568	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1
24	0.0118713	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1 NM_006163.1
25	0.0134662	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1 NM_006163.1 BC027881.1
26	0.0184920	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1 NM_006163.1 BC027881.1 ENST00000362035
27	0.0126352	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1 NM_006163.1 BC027881.1 ENST00000362035 BC026107.2
28	0.0136508	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1 NM_006163.1 BC027881.1 ENST00000362035 BC026107.2 NM_001033515.1
29	0.0166493	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1 NM_006163.1 BC027881.1 ENST00000362035 BC026107.2 NM_001033515.1 NM_000184.2
30	0.0129795	NM_001014444.1 BC025985.1 NM_001025266.1 BC006453.1 NM_001042476.1 NM_004362.1 NM_198086.1 BC132786.1 BC002448.2 BC004130.2 BC062437.1 NM_176823.2 NM_148910.2 NM_014372.3 BC006296.2 NM_001157.2 NM_015464.2 NM_002767.2 NM_004147.3 BC080607.1 NM_021810.3 NM_001827.1 NM_032328.1 NM_006163.1 BC027881.1 ENST00000362035 BC026107.2 NM_001033515.1 NM_000184.2 NM_032380.3

HC vs MG1 No Sex Correction 15 Control Samples

Saket Choudhary

October 26, 2015