Importing necessary packages…
require(WGCNA)
source("WGCNA_accessory_functions.R") ##Found in supplementary files
require(flashClust)
A helper function for data processing. Removes the first column (gene names) and makes it the row names of the resulting, scaled matrix.
process = function(dat){
# saving gene names
mapping = as.numeric(dat[, 1])
# removing gene names from main matrix
hvals = (dat[, -1])
# gene names now row names
rownames(hvals) = mapping
hvals = as.matrix(hvals)
hvals = scale(hvals)
return(hvals)
}
Using ‘process’ function, load in the six matrices of interest
# A is for healthy, B for asthma
# GSE64913
datExprA1 = process(read.csv("healthy.csv"))
datExprB1 = process(read.csv("disease.csv"))
# GSE63142
datExprA2 = process(read.csv("healthy63.csv"))
datExprB2 = process(read.csv("disease63.csv"))
# GSE65204
datExprA3 = process(read.csv("healthy65.csv"))
datExprB3 = process(read.csv("disease65.csv"))
We sample for only the genes that are suitable for analysis based on their expression in all three matrices.
c = goodSamplesGenes(t(datExprA1), verbose = 0)
datExprA1 = datExprA1[c$goodGenes,]
c = goodSamplesGenes(t(datExprA2), verbose = 0)
datExprA2 = datExprA2[c$goodGenes,]
c = goodSamplesGenes(t(datExprA3), verbose = 0)
datExprA3 = datExprA3[c$goodGenes,]
commonProbesA = intersect(intersect (rownames(datExprA1),rownames(datExprA2)), rownames(datExprA3))
datExprA1g = datExprA1[commonProbesA,]
datExprA2g = datExprA2[commonProbesA,]
datExprA3g = datExprA3[commonProbesA,]
c = NULL
c = goodSamplesGenes(t(datExprB1), verbose = 0)
datExprB1 = datExprB1[c$goodGenes,]
c = goodSamplesGenes(t(datExprB2), verbose = 0)
datExprB2 = datExprB2[c$goodGenes,]
c = goodSamplesGenes(t(datExprA3), verbose = 0)
datExprB3 = datExprB3[c$goodGenes,]
commonProbesA = intersect(intersect (rownames(datExprB1),rownames(datExprB2)), rownames(datExprB3))
datExprB1g = datExprB1[commonProbesA,]
datExprB2g = datExprB2[commonProbesA,]
datExprB3g = datExprB3[commonProbesA,]
print(dim(datExprA1g))
## [1] 12806 23
require(MetaIntegrator)
data = getGEOData(c( "GSE65204", "GSE64913", "GSE63142"))
## Warning: 62 parsing failures.
## row col expected actual file
## 54614 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54615 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54616 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54617 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54618 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## ..... ....... .................. ......... ............
## See problems(...) for more details.
## Warning: 94 parsing failures.
## row col expected actual file
## 41001 CONTROL_TYPE 1/0/T/F/TRUE/FALSE pos literal data
## 41002 CONTROL_TYPE 1/0/T/F/TRUE/FALSE pos literal data
## 41003 CONTROL_TYPE 1/0/T/F/TRUE/FALSE pos literal data
## 41004 CONTROL_TYPE 1/0/T/F/TRUE/FALSE pos literal data
## 41005 CONTROL_TYPE 1/0/T/F/TRUE/FALSE pos literal data
## ..... ............ .................. ...... ............
## See problems(...) for more details.
## Length Class Mode
## GSE65204 1 -none- list
## GSE64913 1 -none- list
## GSE63142 1 -none- list
## [1] "GSE65204_series_matrix.txt.gz"
## [1] "GSE64913_series_matrix.txt.gz"
## [1] "GSE63142_series_matrix.txt.gz"
##Cleaning up a weird dataset
gse65204 = read.csv("gse65204.csv")
classes = gse65204[8,]
classes = classes[, -1]
temp = unlist(classes)
for(i in 1:length(classes)){
if(grepl("TRUE", temp[i]) == TRUE){
classes[i] = 1
}else{
classes[i] = 0
}
}
data$originalData$GSE65204$class = unlist(classes)
data$originalData$GSE64913 = classFunction(data$originalData$GSE64913, column = "diagnosis:ch1", diseaseTerms = c("Severe Asthmatic"))##OK....all these datasets are weird
#data$originalData$GSE35571 = classFunction(data$originalData$GSE35571, column = "disease status:ch1", diseaseTerms = c("asthmatic"))
data$originalData$GSE63142 = classFunction(data$originalData$GSE63142, column = "individual:ch1", diseaseTerms = c("severe asthmatic", "not severe asthmatic"))
dataObj = data$originalData$GSE63142
dataObj$pheno$group = as.numeric(dataObj$class)
data$originalData$GSE63142 = NULL
analysis = runMetaAnalysis(data, runLeaveOneOutAnalysis = F)
## Computing effect sizes...
## Computing summary effect sizes...
## Computing Fisher's output...
analysis = filterGenes(analysis, isLeaveOneOut = F, effectSizeThresh = 0, FDRThresh = 0.1)
ROC Plot of our independent cohort
rocPlot(analysis$filterResults$FDR0.1_es0_nStudies1_looaFALSE_hetero0, dataObj)
## Used 589 of 589 pos genes, and 422 of 422 neg genes
## For dataset GSE63142, AUC = 0.844
Soft thresholding is conducted by iterating through certain exponent values (1-10, 12, 14, … 20). For each exponent, the resulting matrix is compared to a scale-free topology, and the R^2 of correlation is plotted against the exponent. We seek the exponent with the highest R^2 so we can scale our resulting matrix.
cex1 = 0.9;
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(t(datExprA1g), powerVector = powers, verbose = 0)
## Warning: executing %dopar% sequentially: no parallel backend registered
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.0135 -0.561 0.928 2740.0 2680.000 4250
## 2 2 0.3010 -1.600 0.929 907.0 842.000 2000
## 3 3 0.6010 -1.910 0.922 379.0 324.000 1100
## 4 4 0.7280 -1.890 0.810 186.0 144.000 683
## 5 5 0.8290 -1.800 0.781 103.0 70.500 504
## 6 6 0.7850 -1.940 0.738 63.7 37.000 461
## 7 7 0.7370 -1.860 0.732 43.2 20.500 436
## 8 8 0.7030 -1.720 0.759 31.7 11.900 420
## 9 9 0.6650 -1.560 0.801 24.9 7.200 408
## 10 10 0.6460 -1.440 0.846 20.7 4.450 399
## 11 12 0.6150 -1.240 0.907 16.0 1.870 386
## 12 14 0.6020 -1.130 0.925 13.6 0.839 376
## 13 16 0.6110 -1.060 0.928 12.3 0.403 368
## 14 18 0.6210 -1.030 0.924 11.4 0.206 362
## 15 20 0.6360 -0.991 0.925 10.8 0.109 356
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
main = paste("GSE64913, Healthy"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.80,col="red")
sft = pickSoftThreshold(t(datExprA2g), powerVector = powers, verbose = 0)
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.1410 2.49 0.994 2680.000 2.67e+03 3810.0
## 2 2 0.0149 -0.41 0.991 854.000 8.38e+02 1630.0
## 3 3 0.2380 -1.39 0.985 337.000 3.22e+02 830.0
## 4 4 0.4790 -1.88 0.990 153.000 1.40e+02 466.0
## 5 5 0.6180 -2.13 0.981 76.400 6.70e+01 281.0
## 6 6 0.7520 -2.01 0.991 41.300 3.43e+01 178.0
## 7 7 0.8540 -2.36 0.989 23.800 1.85e+01 144.0
## 8 8 0.9090 -2.46 0.978 14.500 1.05e+01 121.0
## 9 9 0.9460 -2.40 0.974 9.180 6.13e+00 103.0
## 10 10 0.9630 -2.31 0.971 6.070 3.71e+00 89.5
## 11 12 0.9670 -2.08 0.961 2.930 1.47e+00 69.6
## 12 14 0.9550 -1.92 0.950 1.580 6.30e-01 55.7
## 13 16 0.9610 -1.77 0.956 0.936 2.92e-01 45.5
## 14 18 0.9650 -1.66 0.962 0.594 1.42e-01 37.6
## 15 20 0.9580 -1.59 0.960 0.399 7.14e-02 31.5
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
main = paste("GSE63142, Healthy"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.9,col="red")
sft = pickSoftThreshold(t(datExprA3g), powerVector = powers, verbose = 0)
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.0434 0.890 0.975 2820.00 2.80e+03 4310.0
## 2 2 0.1160 -0.792 0.973 951.00 9.09e+02 2060.0
## 3 3 0.4820 -1.330 0.983 398.00 3.58e+02 1170.0
## 4 4 0.7510 -1.650 0.986 192.00 1.59e+02 749.0
## 5 5 0.8730 -1.780 0.988 103.00 7.70e+01 525.0
## 6 6 0.9300 -1.810 0.990 59.80 3.97e+01 392.0
## 7 7 0.9550 -1.810 0.989 37.10 2.15e+01 308.0
## 8 8 0.9620 -1.790 0.988 24.30 1.21e+01 253.0
## 9 9 0.9620 -1.760 0.983 16.60 7.11e+00 213.0
## 10 10 0.9580 -1.720 0.980 11.80 4.29e+00 183.0
## 11 12 0.9490 -1.640 0.975 6.57 1.69e+00 140.0
## 12 14 0.9050 -1.610 0.951 4.00 7.21e-01 111.0
## 13 16 0.9160 -1.550 0.964 2.62 3.28e-01 90.8
## 14 18 0.9250 -1.500 0.976 1.80 1.57e-01 75.5
## 15 20 0.9300 -1.470 0.984 1.29 7.88e-02 63.6
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
main = paste("GSE65204, Healthy"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.92,col="red")
sft = pickSoftThreshold(t(datExprB1g), powerVector = powers, verbose = 0)
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.00226 0.34 0.916 3280.0 3240.00 4360
## 2 2 0.14800 -1.81 0.806 1270.0 1230.00 2200
## 3 3 0.38900 -1.61 0.640 610.0 568.00 1320
## 4 4 0.60300 -2.05 0.565 335.0 299.00 919
## 5 5 0.71500 -2.33 0.633 203.0 171.00 740
## 6 6 0.73300 -2.37 0.684 134.0 104.00 638
## 7 7 0.72500 -2.28 0.738 93.7 67.00 577
## 8 8 0.69900 -2.14 0.761 69.6 44.90 537
## 9 9 0.67800 -1.98 0.790 54.2 30.80 510
## 10 10 0.66200 -1.84 0.800 44.0 21.80 491
## 11 12 0.63500 -1.63 0.807 32.0 11.60 466
## 12 14 0.64200 -1.50 0.798 25.7 6.67 450
## 13 16 0.63500 -1.38 0.808 22.0 4.01 439
## 14 18 0.64000 -1.33 0.793 19.6 2.54 431
## 15 20 0.64500 -1.26 0.801 18.1 1.66 424
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
main = paste("GSE64913, Asthma"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.72,col="red")
sft = pickSoftThreshold(t(datExprB2g), powerVector = powers, verbose = 0)
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.2240 2.210 0.972 2400.000 2.38e+03 3640.00
## 2 2 0.0233 -0.364 0.973 699.000 6.70e+02 1500.00
## 3 3 0.3440 -1.260 0.982 255.000 2.32e+02 730.00
## 4 4 0.6080 -1.730 0.987 107.000 9.22e+01 398.00
## 5 5 0.7140 -2.050 0.983 50.400 4.03e+01 233.00
## 6 6 0.7740 -2.140 0.987 25.600 1.90e+01 144.00
## 7 7 0.8130 -2.150 0.992 13.900 9.44e+00 92.00
## 8 8 0.8380 -2.120 0.990 7.990 4.91e+00 60.70
## 9 9 0.8590 -2.060 0.994 4.800 2.68e+00 41.30
## 10 10 0.8740 -2.000 0.991 3.000 1.50e+00 29.00
## 11 12 0.8420 -2.110 0.976 1.300 5.17e-01 17.10
## 12 14 0.8460 -2.190 0.986 0.627 1.93e-01 11.80
## 13 16 0.8840 -2.100 0.995 0.331 7.69e-02 8.44
## 14 18 0.9010 -1.990 0.987 0.189 3.19e-02 6.22
## 15 20 0.9300 -1.860 0.984 0.114 1.39e-02 4.69
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
main = paste("GSE63142, Asthma"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.83,col="red")
sft = pickSoftThreshold(t(datExprB3g), powerVector = powers, verbose = 0)
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.2630 2.330 0.956 2720.000 2.72e+03 3930.0
## 2 2 0.0213 -0.309 0.945 887.000 8.63e+02 1790.0
## 3 3 0.4100 -1.280 0.943 362.000 3.32e+02 1020.0
## 4 4 0.6800 -1.670 0.958 171.000 1.44e+02 652.0
## 5 5 0.8180 -1.850 0.966 89.900 6.81e+01 456.0
## 6 6 0.8810 -1.910 0.968 51.300 3.43e+01 338.0
## 7 7 0.9170 -1.900 0.973 31.300 1.82e+01 262.0
## 8 8 0.9420 -1.850 0.982 20.200 1.01e+01 209.0
## 9 9 0.9530 -1.800 0.984 13.600 5.83e+00 172.0
## 10 10 0.9570 -1.750 0.986 9.530 3.47e+00 144.0
## 11 12 0.9670 -1.640 0.993 5.130 1.33e+00 104.0
## 12 14 0.9700 -1.560 0.996 3.030 5.54e-01 79.3
## 13 16 0.9530 -1.530 0.983 1.920 2.44e-01 62.6
## 14 18 0.9550 -1.500 0.997 1.280 1.13e-01 50.4
## 15 20 0.9550 -1.470 0.998 0.892 5.46e-02 41.3
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
main = paste("GSE65204, Asthma"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.90,col="red")
We use each of the optimal exponents to create our weighted adjacency matrices for clustering.
We first compute the adjacency matrices, with the powers we got from the previous analysis.
adjA1 = adjacency(t(datExprA1g), type = "signed", power = 5)
diag(adjA1)=0
adjA2 = adjacency(t(datExprA2g), type = "signed", power = 8)
diag(adjA2)=0
adjA3 = adjacency(t(datExprA3g), type = "signed", power = 6)
diag(adjA3)=0
adjB1 = adjacency(t(datExprB1g), type = "signed", power = 5)
diag(adjB1)=0
adjB2 = adjacency(t(datExprB2g), type = "signed", power = 7)
diag(adjB2)=0
adjB3 = adjacency(t(datExprB3g), type = "signed", power = 7)
diag(adjB3)=0
print(dim(adjA1))
## [1] 12806 12806
We define a function main that performs the clustering analysis from an adjacency matrix, and returns the clusters as well as the mappings between gene IDs and their respective cluster.
This function calculates the dissimilarity matrix of each of the adjusted adjacency matrices, which is computed by subtracting the Topological Overlap Matrix from 1. The resulting matrix is clustered by distance, which produces a dendrogram consisting of all the genes in the matrix.
To choose the number of clusters, we use a cut tree technique, that takes clusters that stem from a fixed value. The unique branches at that cutoff each make up one cluster, consisting of all the genes that appear below it.
main = function(adj){
dissTOMA1 = 1-TOMsimilarity(adj, TOMType="signed")
geneTree = flashClust(as.dist(dissTOMA1), method="average")
mColorh=NULL
for (ds in 0:3){
tree = cutreeHybrid(dendro = geneTree, pamStage=FALSE,
minClusterSize = (30-3*ds), cutHeight = 0.99,
deepSplit = ds, distM = dissTOMA1)
mColorh=cbind(mColorh,labels2colors(tree$labels));
}
modulesA1 = mColorh[,3]
require(reshape2)
ordergenes = geneTree$order
df = melt(data.frame(ordergenes, modulesA1))
df = df[, c(1, 3)]
return(list(modulesA1, df))
}
We define a function triple that determines module preservation statistics across a group of three modules.
Module preservation statistics are calculated for the identified modules using permutation test. Comparison of modules are made between all datasets, and permutation testing results return the Zsummary scores for every module.
triple = function(modulesA1, datExprA1g, datExprA2g, datExprA3g){
multiExpr = list(A1=list(data=t(datExprA1g)),A2=list(data=t(datExprA2g)), A3=list(data=t(datExprA3g)))
multiColor = list(A1 = modulesA1)
mp=modulePreservation(multiExpr,multiColor,referenceNetworks=1,verbose=3,networkType="signed",
nPermutations=50,maxGoldModuleSize=100,maxModuleSize=400)
stats = mp$preservation$Z$ref.A1$inColumnsAlsoPresentIn.A2
ranks = mp$preservation$observed$ref.A1$inColumnsAlsoPresentIn.A2
ranking = stats[order(-stats[,2]),c(1:2)]
return(ranking)
}
Here we take the adjacency matrices we computed and apply our previously defined functions.
We use the first dataset as our root dataset, but this doesn’t really matter since we are interested in the genes that stay preserved across all three sets. The order with which we arrive at these genes is not important.
This is the module preservation analysis of the three healthy matrices. We take all the modules that have Zsummary values greater than 10, as described in the Methods section.
This section of code returns the modules that show high preservation. It is returned in a list, where non-null entries contain a dataframe with the module color as well as the gene IDs corresponding to that module.
We provide the results from this analysis as a saved R file so that a user may forgo the timely permutation tests and other analyses. The module preservation takes some time.
modules = main(adjA1)
colormappings = modules[[2]]
modules = modules[[1]]
##Healthy Preservation
ranksA = triple(modules, datExprA1g, datExprA2g, datExprA3g)
colormappingsA = main(adjA1)[[2]]
#A means for all such healthy datasets
##higher Zsummary => more preservation
# Langfelder, P., Luo, R., Oldham, M. C., and Horvath, S.
highpres = ranksA[ranksA$Zsummary.pres > 10,]
preservedA = row.names(highpres)
preservedA = preservedA[preservedA != "grey"]
prescolorsA = colormappingsA[colormappingsA$modulesA1 %in% preservedA,]
prescolorsA$modulesA1 = as.factor(prescolorsA$modulesA1)
goodmodsA = split(prescolorsA, f= prescolorsA$modulesA1)
saveRDS(goodmodsA, "goodmodsA.rds")
We do the same with the disease datasets.
modules = main(adjB1)
colormappings = modules[[2]]
modules = modules[[1]]
##Healthy Preservation
ranksB = triple(modules, datExprB1g, datExprB2g, datExprB3g)
colormappingsB = colormappings
#A means for all such healthy datasets
##higher Zsummary => more preservation
# Langfelder, P., Luo, R., Oldham, M. C., and Horvath, S. highpres = ranksB[ranksB$Zsummary.pres > 10,]
preservedB = row.names(highpres)
preservedB = preservedB[preservedB != "grey"]
prescolorsB = colormappingsB[colormappingsB$modulesA1 %in% preservedB,]
prescolorsB$modulesA1 = as.factor(prescolorsB$modulesA1)
goodmodsB = split(prescolorsB, f= prescolorsB$modulesA1)
saveRDS(goodmodsB, "goodmodsB.rds")
The two objects, one for each condition, should look something like this:
##preserved modules in asthma condition
str(readRDS("goodmodsB.rds"))
## List of 75
## $ antiquewhite4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ bisque4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ black :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ blue :'data.frame': 423 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 4 4 4 4 4 4 4 4 4 4 ...
## ..$ value : int [1:423] 11078 7461 9406 8511 6399 11813 11107 12554 7566 10770 ...
## $ brown :'data.frame': 416 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 5 5 5 5 5 5 5 5 5 5 ...
## ..$ value : int [1:416] 6717 3956 2085 6797 8510 2644 10230 7769 7834 9125 ...
## $ brown4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ coral1 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ coral2 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ cyan :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkgreen :'data.frame': 116 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 10 10 10 10 10 10 10 10 10 10 ...
## ..$ value : int [1:116] 6978 3548 85 11792 9178 1786 1357 1641 10615 6955 ...
## $ darkgrey :'data.frame': 113 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 11 11 11 11 11 11 11 11 11 11 ...
## ..$ value : int [1:113] 12655 10843 8293 3475 11296 7293 5723 7768 11234 2132 ...
## $ darkmagenta :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkolivegreen :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkolivegreen4:'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkorange :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkorange2 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkred :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkseagreen4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkslateblue :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ darkturquoise :'data.frame': 115 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 20 20 20 20 20 20 20 20 20 20 ...
## ..$ value : int [1:115] 10434 8334 3868 8220 2063 2474 2643 1938 4498 8635 ...
## $ firebrick4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ floralwhite :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ green :'data.frame': 263 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 23 23 23 23 23 23 23 23 23 23 ...
## ..$ value : int [1:263] 8581 7304 11721 10510 3722 7103 7460 12793 6878 11773 ...
## $ greenyellow :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ grey :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ grey60 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ honeydew1 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ indianred4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ ivory :'data.frame': 65 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 29 29 29 29 29 29 29 29 29 29 ...
## ..$ value : int [1:65] 7953 2545 11733 9697 7396 12582 5090 7340 4135 4627 ...
## $ lavenderblush3 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightcoral :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightcyan :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightcyan1 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightgreen :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightpink4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightsteelblue :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightsteelblue1:'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ lightyellow :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ magenta :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ maroon :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ mediumorchid :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ mediumpurple2 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ mediumpurple3 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ midnightblue :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ navajowhite2 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ orange :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ orangered3 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ orangered4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ paleturquoise :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ palevioletred3 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ pink :'data.frame': 230 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 51 51 51 51 51 51 51 51 51 51 ...
## ..$ value : int [1:230] 9032 5453 141 9931 12662 10759 12682 3090 12204 6634 ...
## $ plum :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ plum1 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ plum2 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ purple :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ red :'data.frame': 247 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 56 56 56 56 56 56 56 56 56 56 ...
## ..$ value : int [1:247] 5118 9000 8358 1913 11156 6133 12493 11783 8366 2131 ...
## $ royalblue :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ saddlebrown :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ salmon :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ salmon4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ sienna3 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ skyblue :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ skyblue1 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ skyblue2 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ skyblue3 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ steelblue :'data.frame': 84 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 66 66 66 66 66 66 66 66 66 66 ...
## ..$ value : int [1:84] 4112 544 3661 12728 1780 7548 11087 7880 5830 2080 ...
## $ tan :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ thistle1 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ thistle2 :'data.frame': 59 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 69 69 69 69 69 69 69 69 69 69 ...
## ..$ value : int [1:59] 11175 9790 4576 12775 7131 104 8217 1315 5660 9958 ...
## $ turquoise :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ violet :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ white :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ yellow :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ yellow4 :'data.frame': 0 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..:
## ..$ value : int(0)
## $ yellowgreen :'data.frame': 76 obs. of 2 variables:
## ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 75 75 75 75 75 75 75 75 75 75 ...
## ..$ value : int [1:76] 10066 10280 5635 800 11136 6188 4990 2511 3 9571 ...
We perform a Fisher’s exact test to determine
h = readRDS("goodmodsA.rds")
d = readRDS("goodmodsB.rds")
# filtering for the nonempty elements of the list.
total_genes = 12806
pval = c()
for(i in h){
if(length(i$value)> 0){
for(j in d){
if(length(j$value)> 0){
# Need to get counts for Fisher's exact test
# We construct a 2x2 matrix, where the top left value is the intersction of the two modules
# The top right and bottom left are the unique values to each module.
# The bottom right is everything that is left
intersection = length(intersect(i$value,j$value))
# orientation of matrix doesn't matter here...
topright = length(i$value) - intersection
botleft = length(j$value) - intersection
# careful not to overcount here
botright = total_genes - topright - botleft - intersection
mat = matrix(c(intersection, topright, botleft, botright), ncol = 2)
pval = c(pval, fisher.test(mat)$p.value)
}
}
}
}
pvals = matrix(pval, ncol = 5)
print(pvals)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.835819399 0.81268081 0.848811056 0.43724438 0.2819019
## [2,] 0.404794741 1.00000000 1.000000000 0.63997817 0.1665422
## [3,] 0.326048539 0.37256880 0.725764901 0.77200730 0.4523426
## [4,] 0.556214160 1.00000000 1.000000000 0.07516327 0.4437093
## [5,] 0.845557460 0.64185674 0.725716747 0.54656215 1.0000000
## [6,] 0.005783557 1.00000000 0.633367788 0.11410221 0.1538313
## [7,] 0.016180134 1.00000000 0.005417743 0.68736915 1.0000000
## [8,] 0.889638432 0.52475722 0.801123387 0.67271211 0.3328436
## [9,] 0.589239917 1.00000000 1.000000000 0.83855231 0.3647595
## [10,] 1.000000000 0.06762158 1.000000000 1.00000000 1.0000000
## [11,] 0.781664193 1.00000000 1.000000000 0.40863361 1.0000000
## [12,] 0.807844240 0.57512566 0.644872768 0.72490255 0.3255527