Setup

Importing necessary packages…

require(WGCNA)
source("WGCNA_accessory_functions.R") ##Found in supplementary files
require(flashClust)

A helper function for data processing. Removes the first column (gene names) and makes it the row names of the resulting, scaled matrix.

process = function(dat){
      # saving gene names
      mapping = as.numeric(dat[, 1])
      # removing gene names from main matrix
      hvals = (dat[, -1])
      # gene names now row names
      rownames(hvals) = mapping
      hvals = as.matrix(hvals)
      hvals = scale(hvals)
      return(hvals)
}

Using ‘process’ function, load in the six matrices of interest

# A is for healthy, B for asthma
# GSE64913
datExprA1 = process(read.csv("healthy.csv"))
datExprB1 = process(read.csv("disease.csv"))
# GSE63142
datExprA2 = process(read.csv("healthy63.csv"))
datExprB2 = process(read.csv("disease63.csv"))
# GSE65204
datExprA3 = process(read.csv("healthy65.csv"))
datExprB3 = process(read.csv("disease65.csv"))

We sample for only the genes that are suitable for analysis based on their expression in all three matrices.

c = goodSamplesGenes(t(datExprA1), verbose = 0)
datExprA1 = datExprA1[c$goodGenes,]
c = goodSamplesGenes(t(datExprA2), verbose = 0)
datExprA2 = datExprA2[c$goodGenes,]
c = goodSamplesGenes(t(datExprA3), verbose = 0)
datExprA3 = datExprA3[c$goodGenes,]
commonProbesA = intersect(intersect (rownames(datExprA1),rownames(datExprA2)), rownames(datExprA3)) 
datExprA1g = datExprA1[commonProbesA,] 
datExprA2g = datExprA2[commonProbesA,] 
datExprA3g = datExprA3[commonProbesA,]

c = NULL
c = goodSamplesGenes(t(datExprB1), verbose = 0)
datExprB1 = datExprB1[c$goodGenes,]
c = goodSamplesGenes(t(datExprB2), verbose = 0)
datExprB2 = datExprB2[c$goodGenes,]
c = goodSamplesGenes(t(datExprA3), verbose = 0)
datExprB3 = datExprB3[c$goodGenes,]
commonProbesA = intersect(intersect (rownames(datExprB1),rownames(datExprB2)), rownames(datExprB3)) 
datExprB1g = datExprB1[commonProbesA,] 
datExprB2g = datExprB2[commonProbesA,] 
datExprB3g = datExprB3[commonProbesA,]
print(dim(datExprA1g))
## [1] 12806    23

Meta-Analysis

require(MetaIntegrator)
data = getGEOData(c( "GSE65204", "GSE64913", "GSE63142"))
## Warning: 62 parsing failures.
##   row     col           expected    actual         file
## 54614 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54615 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54616 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54617 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## 54618 SPOT_ID 1/0/T/F/TRUE/FALSE --Control literal data
## ..... ....... .................. ......... ............
## See problems(...) for more details.
## Warning: 94 parsing failures.
##   row          col           expected actual         file
## 41001 CONTROL_TYPE 1/0/T/F/TRUE/FALSE    pos literal data
## 41002 CONTROL_TYPE 1/0/T/F/TRUE/FALSE    pos literal data
## 41003 CONTROL_TYPE 1/0/T/F/TRUE/FALSE    pos literal data
## 41004 CONTROL_TYPE 1/0/T/F/TRUE/FALSE    pos literal data
## 41005 CONTROL_TYPE 1/0/T/F/TRUE/FALSE    pos literal data
## ..... ............ .................. ...... ............
## See problems(...) for more details.
##          Length Class  Mode
## GSE65204 1      -none- list
## GSE64913 1      -none- list
## GSE63142 1      -none- list
## [1] "GSE65204_series_matrix.txt.gz"
## [1] "GSE64913_series_matrix.txt.gz"
## [1] "GSE63142_series_matrix.txt.gz"
##Cleaning up a weird dataset
gse65204 = read.csv("gse65204.csv")
classes = gse65204[8,]
classes = classes[, -1]
temp = unlist(classes)
for(i in 1:length(classes)){
    if(grepl("TRUE", temp[i]) == TRUE){
        classes[i] = 1
    }else{
        classes[i] = 0
    }
}
data$originalData$GSE65204$class = unlist(classes)
data$originalData$GSE64913 = classFunction(data$originalData$GSE64913, column = "diagnosis:ch1", diseaseTerms = c("Severe Asthmatic"))##OK....all these datasets are weird
#data$originalData$GSE35571 = classFunction(data$originalData$GSE35571, column = "disease status:ch1", diseaseTerms = c("asthmatic"))
data$originalData$GSE63142 = classFunction(data$originalData$GSE63142, column = "individual:ch1", diseaseTerms = c("severe asthmatic", "not severe asthmatic"))
dataObj = data$originalData$GSE63142
dataObj$pheno$group = as.numeric(dataObj$class)
data$originalData$GSE63142 = NULL
analysis = runMetaAnalysis(data, runLeaveOneOutAnalysis = F)
## Computing effect sizes...
## Computing summary effect sizes...
## Computing Fisher's output...

analysis = filterGenes(analysis, isLeaveOneOut = F, effectSizeThresh = 0, FDRThresh = 0.1)

ROC Plot of our independent cohort

rocPlot(analysis$filterResults$FDR0.1_es0_nStudies1_looaFALSE_hetero0, dataObj)
## Used  589 of  589  pos genes, and  422  of  422  neg genes 
## For dataset GSE63142, AUC = 0.844

Soft Thresholding

Soft thresholding is conducted by iterating through certain exponent values (1-10, 12, 14, … 20). For each exponent, the resulting matrix is compared to a scale-free topology, and the R^2 of correlation is plotted against the exponent. We seek the exponent with the highest R^2 so we can scale our resulting matrix.

cex1 = 0.9;
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(t(datExprA1g), powerVector = powers, verbose = 0)
## Warning: executing %dopar% sequentially: no parallel backend registered
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.0135 -0.561          0.928  2740.0  2680.000   4250
## 2      2   0.3010 -1.600          0.929   907.0   842.000   2000
## 3      3   0.6010 -1.910          0.922   379.0   324.000   1100
## 4      4   0.7280 -1.890          0.810   186.0   144.000    683
## 5      5   0.8290 -1.800          0.781   103.0    70.500    504
## 6      6   0.7850 -1.940          0.738    63.7    37.000    461
## 7      7   0.7370 -1.860          0.732    43.2    20.500    436
## 8      8   0.7030 -1.720          0.759    31.7    11.900    420
## 9      9   0.6650 -1.560          0.801    24.9     7.200    408
## 10    10   0.6460 -1.440          0.846    20.7     4.450    399
## 11    12   0.6150 -1.240          0.907    16.0     1.870    386
## 12    14   0.6020 -1.130          0.925    13.6     0.839    376
## 13    16   0.6110 -1.060          0.928    12.3     0.403    368
## 14    18   0.6210 -1.030          0.924    11.4     0.206    362
## 15    20   0.6360 -0.991          0.925    10.8     0.109    356
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
     main = paste("GSE64913, Healthy"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.80,col="red")

sft = pickSoftThreshold(t(datExprA2g), powerVector = powers, verbose = 0)
##    Power SFT.R.sq slope truncated.R.sq  mean.k. median.k. max.k.
## 1      1   0.1410  2.49          0.994 2680.000  2.67e+03 3810.0
## 2      2   0.0149 -0.41          0.991  854.000  8.38e+02 1630.0
## 3      3   0.2380 -1.39          0.985  337.000  3.22e+02  830.0
## 4      4   0.4790 -1.88          0.990  153.000  1.40e+02  466.0
## 5      5   0.6180 -2.13          0.981   76.400  6.70e+01  281.0
## 6      6   0.7520 -2.01          0.991   41.300  3.43e+01  178.0
## 7      7   0.8540 -2.36          0.989   23.800  1.85e+01  144.0
## 8      8   0.9090 -2.46          0.978   14.500  1.05e+01  121.0
## 9      9   0.9460 -2.40          0.974    9.180  6.13e+00  103.0
## 10    10   0.9630 -2.31          0.971    6.070  3.71e+00   89.5
## 11    12   0.9670 -2.08          0.961    2.930  1.47e+00   69.6
## 12    14   0.9550 -1.92          0.950    1.580  6.30e-01   55.7
## 13    16   0.9610 -1.77          0.956    0.936  2.92e-01   45.5
## 14    18   0.9650 -1.66          0.962    0.594  1.42e-01   37.6
## 15    20   0.9580 -1.59          0.960    0.399  7.14e-02   31.5
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
     main = paste("GSE63142, Healthy"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.9,col="red")

sft = pickSoftThreshold(t(datExprA3g), powerVector = powers, verbose = 0)
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.0434  0.890          0.975 2820.00  2.80e+03 4310.0
## 2      2   0.1160 -0.792          0.973  951.00  9.09e+02 2060.0
## 3      3   0.4820 -1.330          0.983  398.00  3.58e+02 1170.0
## 4      4   0.7510 -1.650          0.986  192.00  1.59e+02  749.0
## 5      5   0.8730 -1.780          0.988  103.00  7.70e+01  525.0
## 6      6   0.9300 -1.810          0.990   59.80  3.97e+01  392.0
## 7      7   0.9550 -1.810          0.989   37.10  2.15e+01  308.0
## 8      8   0.9620 -1.790          0.988   24.30  1.21e+01  253.0
## 9      9   0.9620 -1.760          0.983   16.60  7.11e+00  213.0
## 10    10   0.9580 -1.720          0.980   11.80  4.29e+00  183.0
## 11    12   0.9490 -1.640          0.975    6.57  1.69e+00  140.0
## 12    14   0.9050 -1.610          0.951    4.00  7.21e-01  111.0
## 13    16   0.9160 -1.550          0.964    2.62  3.28e-01   90.8
## 14    18   0.9250 -1.500          0.976    1.80  1.57e-01   75.5
## 15    20   0.9300 -1.470          0.984    1.29  7.88e-02   63.6
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
     main = paste("GSE65204, Healthy"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.92,col="red")

sft = pickSoftThreshold(t(datExprB1g), powerVector = powers, verbose = 0)
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1      1  0.00226  0.34          0.916  3280.0   3240.00   4360
## 2      2  0.14800 -1.81          0.806  1270.0   1230.00   2200
## 3      3  0.38900 -1.61          0.640   610.0    568.00   1320
## 4      4  0.60300 -2.05          0.565   335.0    299.00    919
## 5      5  0.71500 -2.33          0.633   203.0    171.00    740
## 6      6  0.73300 -2.37          0.684   134.0    104.00    638
## 7      7  0.72500 -2.28          0.738    93.7     67.00    577
## 8      8  0.69900 -2.14          0.761    69.6     44.90    537
## 9      9  0.67800 -1.98          0.790    54.2     30.80    510
## 10    10  0.66200 -1.84          0.800    44.0     21.80    491
## 11    12  0.63500 -1.63          0.807    32.0     11.60    466
## 12    14  0.64200 -1.50          0.798    25.7      6.67    450
## 13    16  0.63500 -1.38          0.808    22.0      4.01    439
## 14    18  0.64000 -1.33          0.793    19.6      2.54    431
## 15    20  0.64500 -1.26          0.801    18.1      1.66    424
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
     main = paste("GSE64913, Asthma"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.72,col="red")

sft = pickSoftThreshold(t(datExprB2g), powerVector = powers, verbose = 0)
##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k.  max.k.
## 1      1   0.2240  2.210          0.972 2400.000  2.38e+03 3640.00
## 2      2   0.0233 -0.364          0.973  699.000  6.70e+02 1500.00
## 3      3   0.3440 -1.260          0.982  255.000  2.32e+02  730.00
## 4      4   0.6080 -1.730          0.987  107.000  9.22e+01  398.00
## 5      5   0.7140 -2.050          0.983   50.400  4.03e+01  233.00
## 6      6   0.7740 -2.140          0.987   25.600  1.90e+01  144.00
## 7      7   0.8130 -2.150          0.992   13.900  9.44e+00   92.00
## 8      8   0.8380 -2.120          0.990    7.990  4.91e+00   60.70
## 9      9   0.8590 -2.060          0.994    4.800  2.68e+00   41.30
## 10    10   0.8740 -2.000          0.991    3.000  1.50e+00   29.00
## 11    12   0.8420 -2.110          0.976    1.300  5.17e-01   17.10
## 12    14   0.8460 -2.190          0.986    0.627  1.93e-01   11.80
## 13    16   0.8840 -2.100          0.995    0.331  7.69e-02    8.44
## 14    18   0.9010 -1.990          0.987    0.189  3.19e-02    6.22
## 15    20   0.9300 -1.860          0.984    0.114  1.39e-02    4.69
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
     main = paste("GSE63142, Asthma"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.83,col="red")

sft = pickSoftThreshold(t(datExprB3g), powerVector = powers, verbose = 0)
##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
## 1      1   0.2630  2.330          0.956 2720.000  2.72e+03 3930.0
## 2      2   0.0213 -0.309          0.945  887.000  8.63e+02 1790.0
## 3      3   0.4100 -1.280          0.943  362.000  3.32e+02 1020.0
## 4      4   0.6800 -1.670          0.958  171.000  1.44e+02  652.0
## 5      5   0.8180 -1.850          0.966   89.900  6.81e+01  456.0
## 6      6   0.8810 -1.910          0.968   51.300  3.43e+01  338.0
## 7      7   0.9170 -1.900          0.973   31.300  1.82e+01  262.0
## 8      8   0.9420 -1.850          0.982   20.200  1.01e+01  209.0
## 9      9   0.9530 -1.800          0.984   13.600  5.83e+00  172.0
## 10    10   0.9570 -1.750          0.986    9.530  3.47e+00  144.0
## 11    12   0.9670 -1.640          0.993    5.130  1.33e+00  104.0
## 12    14   0.9700 -1.560          0.996    3.030  5.54e-01   79.3
## 13    16   0.9530 -1.530          0.983    1.920  2.44e-01   62.6
## 14    18   0.9550 -1.500          0.997    1.280  1.13e-01   50.4
## 15    20   0.9550 -1.470          0.998    0.892  5.46e-02   41.3
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="signed R^2",type="n",
     main = paste("GSE65204, Asthma"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.90,col="red")

We use each of the optimal exponents to create our weighted adjacency matrices for clustering.

Clustering

We first compute the adjacency matrices, with the powers we got from the previous analysis.

adjA1 = adjacency(t(datExprA1g), type = "signed", power = 5)
diag(adjA1)=0
adjA2 = adjacency(t(datExprA2g), type = "signed", power = 8)
diag(adjA2)=0
adjA3 = adjacency(t(datExprA3g), type = "signed", power = 6)
diag(adjA3)=0
adjB1 = adjacency(t(datExprB1g), type = "signed", power = 5)
diag(adjB1)=0
adjB2 = adjacency(t(datExprB2g), type = "signed", power = 7)
diag(adjB2)=0
adjB3 = adjacency(t(datExprB3g), type = "signed", power = 7)
diag(adjB3)=0
print(dim(adjA1))
## [1] 12806 12806

We define a function main that performs the clustering analysis from an adjacency matrix, and returns the clusters as well as the mappings between gene IDs and their respective cluster.

This function calculates the dissimilarity matrix of each of the adjusted adjacency matrices, which is computed by subtracting the Topological Overlap Matrix from 1. The resulting matrix is clustered by distance, which produces a dendrogram consisting of all the genes in the matrix.

To choose the number of clusters, we use a cut tree technique, that takes clusters that stem from a fixed value. The unique branches at that cutoff each make up one cluster, consisting of all the genes that appear below it.

main = function(adj){
      dissTOMA1 = 1-TOMsimilarity(adj, TOMType="signed")
      geneTree = flashClust(as.dist(dissTOMA1), method="average")
      mColorh=NULL
      for (ds in 0:3){
            tree = cutreeHybrid(dendro = geneTree, pamStage=FALSE,
                                minClusterSize = (30-3*ds), cutHeight = 0.99,
                                deepSplit = ds, distM = dissTOMA1)
            mColorh=cbind(mColorh,labels2colors(tree$labels));
      }
      modulesA1 = mColorh[,3]
      require(reshape2)
      ordergenes = geneTree$order
      df = melt(data.frame(ordergenes, modulesA1))
      df = df[, c(1, 3)]
      return(list(modulesA1, df))
}

Module Preservation

We define a function triple that determines module preservation statistics across a group of three modules.

Module preservation statistics are calculated for the identified modules using permutation test. Comparison of modules are made between all datasets, and permutation testing results return the Zsummary scores for every module.

triple = function(modulesA1, datExprA1g, datExprA2g, datExprA3g){
      multiExpr = list(A1=list(data=t(datExprA1g)),A2=list(data=t(datExprA2g)), A3=list(data=t(datExprA3g)))
      multiColor = list(A1 = modulesA1)
      mp=modulePreservation(multiExpr,multiColor,referenceNetworks=1,verbose=3,networkType="signed",
                            nPermutations=50,maxGoldModuleSize=100,maxModuleSize=400)
      stats = mp$preservation$Z$ref.A1$inColumnsAlsoPresentIn.A2
      ranks = mp$preservation$observed$ref.A1$inColumnsAlsoPresentIn.A2
      ranking = stats[order(-stats[,2]),c(1:2)]
      return(ranking)
}

Applying Clustering and Preservation Function across Datasets

Here we take the adjacency matrices we computed and apply our previously defined functions.

We use the first dataset as our root dataset, but this doesn’t really matter since we are interested in the genes that stay preserved across all three sets. The order with which we arrive at these genes is not important.

This is the module preservation analysis of the three healthy matrices. We take all the modules that have Zsummary values greater than 10, as described in the Methods section.

This section of code returns the modules that show high preservation. It is returned in a list, where non-null entries contain a dataframe with the module color as well as the gene IDs corresponding to that module.

We provide the results from this analysis as a saved R file so that a user may forgo the timely permutation tests and other analyses. The module preservation takes some time.

modules = main(adjA1)
colormappings = modules[[2]]
modules = modules[[1]]
##Healthy Preservation
ranksA = triple(modules, datExprA1g, datExprA2g, datExprA3g)
colormappingsA = main(adjA1)[[2]]
#A means for all such healthy datasets
##higher Zsummary => more preservation
# Langfelder, P., Luo, R., Oldham, M. C., and Horvath, S. 
highpres = ranksA[ranksA$Zsummary.pres > 10,]
preservedA = row.names(highpres)
preservedA = preservedA[preservedA != "grey"]
prescolorsA = colormappingsA[colormappingsA$modulesA1 %in% preservedA,]
prescolorsA$modulesA1 = as.factor(prescolorsA$modulesA1)
goodmodsA = split(prescolorsA, f= prescolorsA$modulesA1)
saveRDS(goodmodsA, "goodmodsA.rds")

We do the same with the disease datasets.

modules = main(adjB1)
colormappings = modules[[2]]
modules = modules[[1]]
##Healthy Preservation
ranksB = triple(modules, datExprB1g, datExprB2g, datExprB3g)
colormappingsB = colormappings
#A means for all such healthy datasets
##higher Zsummary => more preservation
# Langfelder, P., Luo, R., Oldham, M. C., and Horvath, S. highpres = ranksB[ranksB$Zsummary.pres > 10,]
preservedB = row.names(highpres)
preservedB = preservedB[preservedB != "grey"]
prescolorsB = colormappingsB[colormappingsB$modulesA1 %in% preservedB,]


prescolorsB$modulesA1 = as.factor(prescolorsB$modulesA1)
goodmodsB = split(prescolorsB, f= prescolorsB$modulesA1)
saveRDS(goodmodsB, "goodmodsB.rds")

The two objects, one for each condition, should look something like this:

##preserved modules in asthma condition
str(readRDS("goodmodsB.rds"))
## List of 75
##  $ antiquewhite4  :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ bisque4        :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ black          :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ blue           :'data.frame': 423 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 4 4 4 4 4 4 4 4 4 4 ...
##   ..$ value    : int [1:423] 11078 7461 9406 8511 6399 11813 11107 12554 7566 10770 ...
##  $ brown          :'data.frame': 416 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 5 5 5 5 5 5 5 5 5 5 ...
##   ..$ value    : int [1:416] 6717 3956 2085 6797 8510 2644 10230 7769 7834 9125 ...
##  $ brown4         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ coral1         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ coral2         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ cyan           :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkgreen      :'data.frame': 116 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ value    : int [1:116] 6978 3548 85 11792 9178 1786 1357 1641 10615 6955 ...
##  $ darkgrey       :'data.frame': 113 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 11 11 11 11 11 11 11 11 11 11 ...
##   ..$ value    : int [1:113] 12655 10843 8293 3475 11296 7293 5723 7768 11234 2132 ...
##  $ darkmagenta    :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkolivegreen :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkolivegreen4:'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkorange     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkorange2    :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkred        :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkseagreen4  :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkslateblue  :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ darkturquoise  :'data.frame': 115 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 20 20 20 20 20 20 20 20 20 20 ...
##   ..$ value    : int [1:115] 10434 8334 3868 8220 2063 2474 2643 1938 4498 8635 ...
##  $ firebrick4     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ floralwhite    :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ green          :'data.frame': 263 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 23 23 23 23 23 23 23 23 23 23 ...
##   ..$ value    : int [1:263] 8581 7304 11721 10510 3722 7103 7460 12793 6878 11773 ...
##  $ greenyellow    :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ grey           :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ grey60         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ honeydew1      :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ indianred4     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ ivory          :'data.frame': 65 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 29 29 29 29 29 29 29 29 29 29 ...
##   ..$ value    : int [1:65] 7953 2545 11733 9697 7396 12582 5090 7340 4135 4627 ...
##  $ lavenderblush3 :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightcoral     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightcyan      :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightcyan1     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightgreen     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightpink4     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightsteelblue :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightsteelblue1:'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ lightyellow    :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ magenta        :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ maroon         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ mediumorchid   :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ mediumpurple2  :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ mediumpurple3  :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ midnightblue   :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ navajowhite2   :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ orange         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ orangered3     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ orangered4     :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ paleturquoise  :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ palevioletred3 :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ pink           :'data.frame': 230 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 51 51 51 51 51 51 51 51 51 51 ...
##   ..$ value    : int [1:230] 9032 5453 141 9931 12662 10759 12682 3090 12204 6634 ...
##  $ plum           :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ plum1          :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ plum2          :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ purple         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ red            :'data.frame': 247 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 56 56 56 56 56 56 56 56 56 56 ...
##   ..$ value    : int [1:247] 5118 9000 8358 1913 11156 6133 12493 11783 8366 2131 ...
##  $ royalblue      :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ saddlebrown    :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ salmon         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ salmon4        :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ sienna3        :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ skyblue        :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ skyblue1       :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ skyblue2       :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ skyblue3       :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ steelblue      :'data.frame': 84 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 66 66 66 66 66 66 66 66 66 66 ...
##   ..$ value    : int [1:84] 4112 544 3661 12728 1780 7548 11087 7880 5830 2080 ...
##  $ tan            :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ thistle1       :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ thistle2       :'data.frame': 59 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 69 69 69 69 69 69 69 69 69 69 ...
##   ..$ value    : int [1:59] 11175 9790 4576 12775 7131 104 8217 1315 5660 9958 ...
##  $ turquoise      :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ violet         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ white          :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ yellow         :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ yellow4        :'data.frame': 0 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 
##   ..$ value    : int(0) 
##  $ yellowgreen    :'data.frame': 76 obs. of  2 variables:
##   ..$ modulesA1: Factor w/ 75 levels "antiquewhite4",..: 75 75 75 75 75 75 75 75 75 75 ...
##   ..$ value    : int [1:76] 10066 10280 5635 800 11136 6188 4990 2511 3 9571 ...

We perform a Fisher’s exact test to determine

h = readRDS("goodmodsA.rds")
d = readRDS("goodmodsB.rds")

# filtering for the nonempty elements of the list. 
total_genes = 12806
pval = c()
for(i in h){
    if(length(i$value)> 0){
        for(j in d){
            if(length(j$value)> 0){
                # Need to get counts for Fisher's exact test
                # We construct a 2x2 matrix, where the top left value is the intersction of the two modules
                # The top right and bottom left are the unique values to each module. 
                # The bottom right is everything that is left
                intersection = length(intersect(i$value,j$value))
                # orientation of matrix doesn't matter here...
                topright = length(i$value) - intersection
                botleft = length(j$value) - intersection
                # careful not to overcount here
                botright = total_genes - topright - botleft - intersection
                mat = matrix(c(intersection, topright, botleft, botright), ncol = 2)
                pval = c(pval, fisher.test(mat)$p.value)
            }
        }
    }
}

pvals = matrix(pval, ncol = 5)
print(pvals)
##              [,1]       [,2]        [,3]       [,4]      [,5]
##  [1,] 0.835819399 0.81268081 0.848811056 0.43724438 0.2819019
##  [2,] 0.404794741 1.00000000 1.000000000 0.63997817 0.1665422
##  [3,] 0.326048539 0.37256880 0.725764901 0.77200730 0.4523426
##  [4,] 0.556214160 1.00000000 1.000000000 0.07516327 0.4437093
##  [5,] 0.845557460 0.64185674 0.725716747 0.54656215 1.0000000
##  [6,] 0.005783557 1.00000000 0.633367788 0.11410221 0.1538313
##  [7,] 0.016180134 1.00000000 0.005417743 0.68736915 1.0000000
##  [8,] 0.889638432 0.52475722 0.801123387 0.67271211 0.3328436
##  [9,] 0.589239917 1.00000000 1.000000000 0.83855231 0.3647595
## [10,] 1.000000000 0.06762158 1.000000000 1.00000000 1.0000000
## [11,] 0.781664193 1.00000000 1.000000000 0.40863361 1.0000000
## [12,] 0.807844240 0.57512566 0.644872768 0.72490255 0.3255527