Cross-platform integration and Differential Expression analysis of T2DM and HTN transcriptomic profiles

library(GEOquery)

## Loading required package: Biobase

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Setting options('download.file.method.GEOquery'='auto')

## Setting options('GEOquery.inmemory.gpl'=FALSE)

library(affy)
library(limma)

## 
## Attaching package: 'limma'

## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA

library(sva)

## Loading required package: mgcv

## Loading required package: nlme

## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.

## Loading required package: genefilter

## Loading required package: BiocParallel

library(WGCNA)

## Loading required package: dynamicTreeCut

## Loading required package: fastcluster

## 
## Attaching package: 'fastcluster'

## The following object is masked from 'package:stats':
## 
##     hclust

##

## 
## Attaching package: 'WGCNA'

## The following object is masked from 'package:stats':
## 
##     cor

library(ensembldb)

## Loading required package: GenomicRanges

## Loading required package: stats4

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:utils':
## 
##     findMatches

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:nlme':
## 
##     collapse

## Loading required package: GenomeInfoDb

## Loading required package: GenomicFeatures

## Loading required package: AnnotationDbi

## Loading required package: AnnotationFilter

## 
## Attaching package: 'ensembldb'

## The following object is masked from 'package:stats':
## 
##     filter

library(biomaRt)

# Load a integrative_matriz_HTN csv file
data_T2DM <- read.csv("/Users/lorandacalderonzamora/Downloads/datExpr_integrative_T2DM.csv", row.names = 1)

# Load a integrative_matriz_HTN_2 csv file
data_HTN <- read.csv("/Users/lorandacalderonzamora/Downloads/datExpr_integrative_HTN.csv", row.names = 1)

# Merging CRdata_HTN and HTN_2 Expression matrices by common genes
common_genes <- intersect(rownames(data_T2DM), rownames(data_HTN))
data_T2DM_common <- data_T2DM[common_genes, ]
data_HTN_common <- data_HTN[common_genes, ]

unificated_expr_matrix <- cbind(data_T2DM_common, data_HTN_common)
unificated_expr_matrix <- as.data.frame(unificated_expr_matrix)

# Relabeling sample identifiers with group labels for Differential Expression analysis
sample_ids <- colnames(unificated_expr_matrix)

group_labels <- sample_ids
group_labels[group_labels %in% c("GSM631755", "GSM631756", "GSM631757", "GSM631758", "GSM631759", "GSM631760", "GSM631761", "GSM524151", "GSM524152", "GSM524153","GSM524154", "GSM524155", "GSM524156", "GSM524157", "GSM524158", "GSM524159", "GSM524160", "GSM701161", "GSM701162", "GSM701163", "GSM701164", "GSM701165", "GSM700797", "GSM700798", "GSM609528", "GSM609529", "GSM609530")] <- "CTL"

group_labels[group_labels %in% c("GSM631762", "GSM631763", "GSM631764", "GSM631765", "GSM631766", "GSM631767", "GSM524161", "GSM524162", "GSM524163", "GSM524164", "GSM524165", "GSM524166", "GSM524167", "GSM524168", "GSM524169", "GSM524170", "GSM701166", "GSM701167", "GSM701168", "GSM701169", "GSM701170", "GSM701171", "GSM701172", "GSM701173", "GSM701174", "GSM700799", "GSM700800", "GSM700801", "GSM700802", "GSM700803", "GSM609526", "GSM609527")] <- "Disease"

colnames(unificated_expr_matrix) <- group_labels

# Boxplot of the merged Expression matrix from T2DM and HTN prior to Batch correction
boxplot(unificated_expr_matrix, main = "Gene Expression distribution before Batch correction", col = c("red", "green")[as.numeric(factor(group_labels))], las = 2, ylab = "Normalized Expression (log2 intensity)")
legend("topright", legend = levels(factor(group_labels)), fill = c("red", "green"))

# Quantile normalization of merged Expression matrix
library(preprocessCore)
expr_matrix_qn <- normalize.quantiles(as.matrix(unificated_expr_matrix))
rownames(expr_matrix_qn) <- rownames(unificated_expr_matrix)
colnames(expr_matrix_qn) <- colnames(unificated_expr_matrix)

# Batch effect correction between T2DM and HTN Datasets using ComBat
n_T2DM <- ncol(data_T2DM_common)
n_HTN <- ncol(data_HTN_common)
batch <- c(rep("T2DM", n_T2DM), rep("HTN", n_HTN))

combat_expr <- ComBat(dat = expr_matrix_qn, batch = batch, par.prior = TRUE, prior.plots = FALSE)

## Found2batches

## Adjusting for0covariate(s) or covariate level(s)

## Standardizing Data across genes

## Fitting L/S model and finding priors

## Finding parametric adjustments

## Adjusting the Data

# Boxplot of the merged Expression matrix from T2DM and HTN after to Batch correction
boxplot(expr_matrix_qn, main = "Gene Expression distribution unificated matriz T2DM and HTN after Batch correction", col = c("red", "green")[as.numeric(factor(group_labels))], las = 2, ylab = "Normalized Expression (log2 intensity)", cex.main = 1)
legend("topright", legend = levels(factor(group_labels)), fill = c("red", "green"))

# Define group factor for Differential Expression analysis
sample_ids <- colnames(combat_expr)
group <- as.factor(colnames(combat_expr))

# Differential Expression Analysis between Control and Disease samples from integrated HTN Datasets
design <- model.matrix(~ group)
fit_merge <- lmFit(combat_expr, design)
fit_merge <- eBayes(fit_merge)
tt_merge <- topTable(fit_merge, coef = 2, number = Inf)
head(tt_merge)

##                      logFC  AveExpr         t      P.Value   adj.P.Val        B
## ENSG00000141959  0.5739213 8.248805  5.658399 4.480841e-07 0.002447263 6.106094
## ENSG00000188986  0.3867320 7.606957  5.638846 4.826440e-07 0.002447263 6.038971
## ENSG00000198843 -0.7490496 8.358957 -5.581782 5.992807e-07 0.002447263 5.843419
## ENSG00000236287 -0.7535935 8.400334 -5.270490 1.930672e-06 0.005603029 4.786467
## ENSG00000086504  0.5092553 7.382942  5.215303 2.370672e-06 0.005603029 4.601014
## ENSG00000023516 -0.8334250 7.071209 -5.149190 3.028967e-06 0.005603029 4.379689

# Annotating Probes using Ensembl
ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")

# Gene annotation of Differential Expression using Ensembl gene IDs
gene_ids <- rownames(tt_merge) 
annot_attributes <- c("ensembl_gene_id", "external_gene_name", "entrezgene_id")
geneDat <- getBM(attributes = annot_attributes,
                 filters = "ensembl_gene_id",
                 values = gene_ids,
                 mart = ensembl)

tt_merge$ensembl_gene_id <- rownames(tt_merge)
tt_annotated <- merge(tt_merge, geneDat, by = "ensembl_gene_id")

tt_annotated <- tt_annotated[, c("ensembl_gene_id", "external_gene_name", "entrezgene_id",
                                 "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")]

head(tt_annotated)

##   ensembl_gene_id external_gene_name entrezgene_id       logFC  AveExpr
## 1 ENSG00000000003             TSPAN6          7105 -0.07564257 7.362373
## 2 ENSG00000000005               TNMD         64102  0.17519749 3.406249
## 3 ENSG00000000419               DPM1          8813 -0.37268747 8.265136
## 4 ENSG00000000457              SCYL3         57147 -0.32972419 6.901073
## 5 ENSG00000000460              FIRRM         55732 -0.05557569 3.549164
## 6 ENSG00000000938                FGR          2268  0.02495715 6.471479
##            t    P.Value adj.P.Val         B
## 1 -0.3599639 0.72013235 0.8511666 -5.958687
## 2  2.5493071 0.01335577 0.1010704 -3.073799
## 3 -2.1741328 0.03363210 0.1605036 -3.846104
## 4 -2.1753152 0.03353924 0.1601283 -3.843830
## 5 -0.8271405 0.41142009 0.6355648 -5.694962
## 6  0.2038170 0.83918306 0.9203175 -6.000785

Cross-platform integration and Differential Expression analysis of T2DM and HTN transcriptomic profiles

Loranda_Calderon

2025-04-01