library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(WGCNA)
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
##
## Attaching package: 'fastcluster'
##
## The following object is masked from 'package:stats':
##
## hclust
##
##
##
## Attaching package: 'WGCNA'
##
## The following object is masked from 'package:stats':
##
## cor
options(stringsAsFactors = FALSE)
HTGdata <- read.csv("/Users/lorandacalderonzamora/GSE45516/CEL/datExprHTG.csv")
dim(HTGdata)
## [1] 23663 10
names(HTGdata)
## [1] "Gene_ID" "GSM1106065" "GSM1106066" "GSM1106067" "GSM1106068"
## [6] "GSM1106069" "GSM1106070" "GSM1106071" "GSM1106072" "GSM1106073"
rownames(HTGdata) <- HTGdata$X
HTGdata<- HTGdata[, -1]
datExpr <- as.data.frame(t(HTGdata))
dim(datExpr)
## [1] 9 23663
gsg = goodSamplesGenes(datExpr, verbose = 3);
## Flagging genes and samples with too many missing values...
## ..step 1
gsg$allOK
## [1] TRUE
sampleTree = hclust(dist(datExpr), method = "average");
sizeGrWindow(12,9)
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
traitData <- read.csv("/Users/lorandacalderonzamora/GSE45516/CEL/datMeta.csv")
dim(traitData )
## [1] 9 4
names(traitData )
## [1] "geo_accession" "Disease" "Age" "Gender"
sampleNames <- trimws(rownames(datExpr))
traitData$geo_accession <- trimws(traitData$geo_accession)
sampleNames <- rownames(datExpr)
head(traitData$geo_accession)
## [1] "GSM1106065" "GSM1106066" "GSM1106067" "GSM1106068" "GSM1106069"
## [6] "GSM1106070"
all(sampleNames %in% traitData$geo_accession)
## [1] TRUE
traitRows <- match(sampleNames, traitData$geo_accession)
datTraits <- traitData[traitRows, ]
rownames(datTraits) <- datTraits$geo_accession
datTraits[] <- lapply(datTraits, function(x) {
if (is.character(x) || is.factor(x)) {
as.numeric(as.factor(x))
} else {
x
}
})
datTraits_clean <- datTraits[, sapply(datTraits, is.numeric)]
str(datTraits_clean)
## 'data.frame': 9 obs. of 4 variables:
## $ geo_accession: num 1 2 3 4 5 6 7 8 9
## $ Disease : num 1 1 1 2 2 2 2 2 2
## $ Age : int 28 48 57 38 63 57 47 38 37
## $ Gender : num 1 1 1 1 1 1 1 1 1
sampleTree2 <- hclust(dist(datExpr), method = "average")
traitColors <- numbers2colors(datTraits_clean, signed = FALSE)
## Warning in numbers2colors(datTraits_clean, signed = FALSE): (some columns in)
## 'x' are constant. Their color will be the color of NA.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits_clean),
main = "Sample dendrogram and trait heatmap")

save(datExpr, datTraits_clean, file = "HTGdataInput.RData")
lnames = load(file = "HTGdataInput.RData");
#The variable lnames contains the names of loaded variables.
lnames
## [1] "datExpr" "datTraits_clean"
# Filter the least variable genes (optional)
variance_genes <- apply(datExpr, 2, var)
datExpr_filtered <- datExpr[, variance_genes > quantile(variance_genes, 0.75)]
# Select a narrower range of powers due to few samples
powers = c(1:6)
sft = pickSoftThreshold(datExpr_filtered, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 5916.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 5916 of 5916
## Warning: executing %dopar% sequentially: no parallel backend registered
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.614 1.700 0.684 2490 2550 3460
## 2 2 0.180 0.334 0.162 1450 1410 2530
## 3 3 0.278 -0.227 0.102 968 869 2020
## 4 4 0.808 -0.494 0.754 702 574 1690
## 5 5 0.906 -0.645 0.883 536 399 1450
## 6 6 0.916 -0.737 0.896 424 286 1280
sizeGrWindow(9, 5)
par(mfrow = c(1, 2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3]) * sft$fitIndices[,2],
xlab = "Soft Threshold (power)",
ylab = "Scale Free Topology Model Fit, signed R^2", type = "n",
main = "Scale independence")
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3]) * sft$fitIndices[,2],
labels = powers, cex = cex1, col = "red")
abline(h = 0.80, col = "red") # Ajuste del umbral a 0.80
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab = "Soft Threshold (power)",
ylab = "Mean Connectivity", type = "n",
main = "Mean connectivity")
text(sft$fitIndices[,1], sft$fitIndices[,5], labels = powers, cex = cex1, col = "red")
net = blockwiseModules(
datExpr,
power = 6,
TOMType = "unsigned",
minModuleSize = 200,
reassignThreshold = 0,
mergeCutHeight = 0.20,
numericLabels = TRUE,
pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "TOM_HTG",
verbose = 3
)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ....pre-clustering genes to determine blocks..
## Projective K-means:
## ..k-means clustering..
## ..merging smaller clusters...
## Block sizes:
## gBlocks
## 1 2 3 4 5
## 4955 4954 4895 4484 4375
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file TOM_HTG-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 51 genes from module 1 because their KME is too low.
## ..removing 26 genes from module 2 because their KME is too low.
## ..removing 1 genes from module 3 because their KME is too low.
## ..removing 5 genes from module 4 because their KME is too low.
## ..removing 3 genes from module 5 because their KME is too low.
## ..removing 2 genes from module 6 because their KME is too low.
## ..Working on block 2 .
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 2 into file TOM_HTG-block.2.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..Working on block 3 .
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 3 into file TOM_HTG-block.3.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..Working on block 4 .
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 4 into file TOM_HTG-block.4.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 27 genes from module 1 because their KME is too low.
## ..removing 9 genes from module 2 because their KME is too low.
## ..removing 7 genes from module 3 because their KME is too low.
## ..removing 1 genes from module 4 because their KME is too low.
## ..removing 1 genes from module 6 because their KME is too low.
## ..removing 2 genes from module 7 because their KME is too low.
## ..removing 1 genes from module 8 because their KME is too low.
## ..removing 1 genes from module 10 because their KME is too low.
## ..Working on block 5 .
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 5 into file TOM_HTG-block.5.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 7 genes from module 1 because their KME is too low.
## ..removing 1 genes from module 2 because their KME is too low.
## ..removing 6 genes from module 3 because their KME is too low.
## ..removing 4 genes from module 7 because their KME is too low.
## ..removing 7 genes from module 8 because their KME is too low.
## ..removing 1 genes from module 9 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.2
## Calculating new MEs...
sizeGrWindow(12, 9)
mergedColors = labels2colors(net$colors)
for (i in 1:length(net$dendrograms)) {
plotDendroAndColors(
net$dendrograms[[i]],
mergedColors[net$blockGenes[[i]]],
"Module colors",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE, guideHang = 0.05
)
}
table(net$colors) # Conteo del número de genes por módulo
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 163 7108 3309 2540 879 657 656 641 559 497 486 435 418 409 391 386
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29
## 378 377 372 358 337 333 287 283 276 254 238 220 214 202
MEs = net$MEs # # Module Eigengene
moduleTraitCor = cor(MEs, datTraits_clean, use = "pairwise.complete.obs")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))
print(moduleTraitCor)
## geo_accession Disease Age Gender
## ME8 -0.4382755148 -0.36457149 -0.104383861 NA
## ME9 -0.2352229168 -0.35510138 0.023690670 NA
## ME17 -0.5689647285 -0.37809909 -0.085707826 NA
## ME16 -0.4514915787 -0.09074511 0.543004884 NA
## ME20 -0.4970108215 -0.34308831 0.617882022 NA
## ME23 -0.8230491909 -0.65341026 -0.296925655 NA
## ME4 -0.5925959756 -0.90279162 0.006531993 NA
## ME6 -0.6305260798 -0.76633530 0.271146142 NA
## ME1 -0.8489723840 -0.82311878 0.280952178 NA
## ME5 -0.7865146094 -0.78344549 0.160073791 NA
## ME15 -0.1956299774 0.13870565 -0.208432057 NA
## ME11 -0.2483511781 0.21374683 0.301846403 NA
## ME25 -0.3241506570 0.04194377 0.408948816 NA
## ME28 -0.0612228444 -0.10532073 0.690944927 NA
## ME14 -0.5613228524 -0.37390689 0.314084857 NA
## ME2 -0.5543003743 -0.59802441 0.090798838 NA
## ME13 -0.4054150893 -0.20857445 0.398355252 NA
## ME21 0.1328138014 0.12629798 0.264375542 NA
## ME10 -0.2560984761 -0.34864712 0.571461193 NA
## ME27 -0.6499293996 -0.38002505 0.449144117 NA
## ME18 0.1866279473 -0.19554363 0.165176368 NA
## ME19 -0.6866338701 -0.72554878 -0.370975140 NA
## ME24 -0.1469863083 -0.50120779 -0.341675325 NA
## ME26 -0.3313068214 -0.54694617 -0.236598033 NA
## ME7 0.0001186404 -0.30538707 -0.522576307 NA
## ME12 -0.1172239434 -0.54019252 -0.281214220 NA
## ME22 -0.6211964781 -0.77746667 -0.490912573 NA
## ME3 0.7326304467 0.92741661 -0.040203096 NA
## ME29 0.1857184646 0.09076155 -0.231270352 NA
## ME0 -0.1400723272 0.02347029 0.061031718 NA
print(moduleTraitPvalue)
## geo_accession Disease Age Gender
## ME8 0.238005950 0.3347214098 0.78927234 NA
## ME9 0.542356011 0.3483617023 0.95176033 NA
## ME17 0.109863685 0.3157041563 0.82646276 NA
## ME16 0.222493890 0.8163994593 0.13084682 NA
## ME20 0.173451876 0.3660455168 0.07618251 NA
## ME23 0.006424560 0.0563291706 0.43780534 NA
## ME4 0.092651349 0.0008564954 0.98669361 NA
## ME6 0.068684953 0.0160177789 0.48035809 NA
## ME1 0.003790369 0.0064161850 0.46397266 NA
## ME5 0.011928080 0.0124986811 0.68078352 NA
## ME15 0.613958266 0.7219223841 0.59045916 NA
## ME11 0.519351148 0.5807981432 0.42987846 NA
## ME25 0.394767807 0.9146780205 0.27444163 NA
## ME28 0.875666762 0.7874157930 0.03930337 NA
## ME14 0.115813221 0.3215382858 0.41044472 NA
## ME2 0.121447371 0.0889477338 0.81629225 NA
## ME13 0.279017166 0.5901995794 0.28827619 NA
## ME21 0.733379651 0.7461034497 0.49181004 NA
## ME10 0.505960268 0.3578102041 0.10796092 NA
## ME27 0.058109517 0.3130418557 0.22520761 NA
## ME18 0.630667594 0.6141178192 0.67106239 NA
## ME19 0.041062079 0.0269294104 0.32565004 NA
## ME24 0.705900799 0.1692744734 0.36815314 NA
## ME26 0.383794391 0.1275197659 0.53992830 NA
## ME7 0.999758308 0.4242146378 0.14890875 NA
## ME12 0.763911087 0.1332519831 0.46353811 NA
## ME22 0.074170193 0.0136622913 0.17962553 NA
## ME3 0.024762720 0.0003158023 0.91820916 NA
## ME29 0.632363980 0.8163666686 0.54935720 NA
## ME0 0.719271484 0.9522086510 0.87605250 NA
# Extract the correlations and p-values for the trait "Disease"
cor_Disease = moduleTraitCor[, "Disease"]
pval_Disease = moduleTraitPvalue[, "Disease"]
best_module = which.max(abs(cor_Disease))
print(paste("Most significant module:", names(cor_Disease)[best_module]))
## [1] "Most significant module: ME3"
print(paste("Correlation:", cor_Disease[best_module]))
## [1] "Correlation: 0.927416610406747"
print(paste("P-value:", pval_Disease[best_module]))
## [1] "P-value: 0.000315802340459394"