library(biomaRt)
library(ensembldb)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
## 
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
## 
##     filter
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:ensembldb':
## 
##     filter, select
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(viper)      
library(dorothea)
# Load the gene expression dataset for T2DM
T2DMdata <- read.csv("/Users/lorandacalderonzamora/Downloads/datExpr_integrative_T2DM.csv", row.names = NULL)
colnames(T2DMdata)[1] <- "ensembl_gene_id"
rownames(T2DMdata) <- T2DMdata$ensembl_gene_id
T2DMdata <- T2DMdata[, -1]
# Retrieving Gene Annotation from Ensembl with biomaRt
ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
getinfo <- c("ensembl_gene_id", "entrezgene_id", "external_gene_name")

geneDat <- getBM(attributes = getinfo,
                 filters = "ensembl_gene_id",
                 values = rownames(T2DMdata),
                 mart = ensembl)
# Prepare Expression Matrix with Unique Gene Names
T2DMdata$ensembl_gene_id <- rownames(T2DMdata)
expr_matrix <- merge(T2DMdata, geneDat, by = "ensembl_gene_id", all.x = TRUE)
expr_matrix <- expr_matrix[!duplicated(expr_matrix$external_gene_name), ]

expr_matrix_numeric <- expr_matrix[, grep("GSM", colnames(expr_matrix))]

expr_matrix_unique <- aggregate(expr_matrix_numeric,
                                by = list(expr_matrix$external_gene_name),
                                FUN = mean)

colnames(expr_matrix_unique)[1] <- "external_gene_name"
rownames(expr_matrix_unique) <- expr_matrix_unique$external_gene_name
expr_matrix_unique <- expr_matrix_unique[, -1]
# Infer Transcription Factor Activity Using DoRothEA and VIPER
data(dorothea_hs, package = "dorothea")

dorothea_regulons <- dorothea_hs %>%
  filter(confidence %in% c("A", "B"))

table(dorothea_regulons$target %in% rownames(expr_matrix_unique))
## 
## FALSE  TRUE 
##   665  5743
my_regulon <- dorothea_regulons %>%
  group_by(tf) %>%
  summarise(
    targets = list(setNames(mor, target)),
    .groups = "drop"
  )
my_regulon <- setNames(my_regulon$targets, my_regulon$tf)

my_regulon_viper <- lapply(my_regulon, function(targets) {
  list(tfmode = targets, likelihood = rep(1, length(targets)))
})

tf_activity <- viper(eset = as.matrix(expr_matrix_unique),
                     regulon = my_regulon_viper,
                     method = "scale",
                     nes = TRUE)
## 
## Computing the association scores
## Computing regulons enrichment with aREA
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   2%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   6%  |                                                                              |======                                                                |   8%  |                                                                              |=======                                                               |  10%  |                                                                              |=========                                                             |  12%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  16%  |                                                                              |=============                                                         |  18%  |                                                                              |==============                                                        |  20%  |                                                                              |================                                                      |  22%  |                                                                              |=================                                                     |  24%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  31%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  35%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  39%  |                                                                              |=============================                                         |  41%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  45%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  49%  |                                                                              |====================================                                  |  51%  |                                                                              |=====================================                                 |  53%  |                                                                              |=======================================                               |  55%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  59%  |                                                                              |===========================================                           |  61%  |                                                                              |============================================                          |  63%  |                                                                              |==============================================                        |  65%  |                                                                              |===============================================                       |  67%  |                                                                              |=================================================                     |  69%  |                                                                              |==================================================                    |  71%  |                                                                              |===================================================                   |  73%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  78%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  82%  |                                                                              |===========================================================           |  84%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  88%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  92%  |                                                                              |==================================================================    |  94%  |                                                                              |===================================================================   |  96%  |                                                                              |===================================================================== |  98%  |                                                                              |======================================================================| 100%
# Assign Sample Group Labels
group_labels <- rep(NA, ncol(tf_activity))
names(group_labels) <- colnames(tf_activity)

group_labels[names(group_labels) %in% c(
  "GSM631755", "GSM631756", "GSM631757", "GSM631758", "GSM631759", "GSM631760", "GSM631761",
  "GSM524151", "GSM524152", "GSM524153", "GSM524154", "GSM524155", "GSM524156", "GSM524157", 
  "GSM524158", "GSM524159", "GSM524160"
)] <- "CTL"

group_labels[names(group_labels) %in% c(
  "GSM631762", "GSM631763", "GSM631764", "GSM631765", "GSM631766", "GSM631767",
  "GSM524161", "GSM524162", "GSM524163", "GSM524164", "GSM524165", "GSM524166",
  "GSM524167", "GSM524168", "GSM524169", "GSM524170"
)] <- "Disease"

table(group_labels)
## group_labels
##     CTL Disease 
##      17      16
ctl_T2DM <- which(group_labels == "CTL")
disease_T2DM <- which(group_labels == "Disease")
# Differential TF Activity Between Disease and Control Groups
tf_means <- data.frame(
  TF = rownames(tf_activity),
  Disease = rowMeans(tf_activity[, disease_T2DM]),
  CTL = rowMeans(tf_activity[, ctl_T2DM]),
  stringsAsFactors = FALSE
)

p_values <- apply(tf_activity, 1, function(x) {
  t.test(x[disease_T2DM], x[ctl_T2DM])$p.value
})

tf_means$p_value <- p_values
tf_means <- tf_means[order(tf_means$p_value), ]
print(tf_means)
##            TF      Disease         CTL      p_value
## CEBPA   CEBPA  0.698850736 -0.70142588 1.301703e-05
## E2F4     E2F4 -1.187091583  1.45538579 2.022536e-04
## PRDM14 PRDM14  1.215797325 -1.44446500 5.003614e-04
## FOXP1   FOXP1  1.470917480 -1.67232364 1.925676e-03
## FOXO3   FOXO3 -0.934824225  0.87468059 2.148529e-03
## E2F1     E2F1 -0.811292307  0.92477009 2.250983e-03
## SP1       SP1  0.722834000 -0.77225565 2.588342e-03
## ZNF263 ZNF263 -1.714646306  2.01208813 4.638144e-03
## FOXO1   FOXO1 -0.520806004  0.51319863 4.823330e-03
## USF1     USF1  0.541532097 -0.64678039 6.104274e-03
## ESR1     ESR1  0.579991594 -0.65968011 1.035338e-02
## SMAD4   SMAD4  0.725200792 -0.72571077 1.193440e-02
## SPI1     SPI1  0.883302108 -0.95543100 1.570274e-02
## SP3       SP3  0.496521926 -0.61759743 2.186484e-02
## HNF4A   HNF4A  0.229460610 -0.33426699 2.753137e-02
## CEBPB   CEBPB  0.691579158 -0.52626101 3.853148e-02
## EGR1     EGR1  0.420582097 -0.38435672 4.587560e-02
## STAT2   STAT2  0.668103800 -0.59336775 4.890044e-02
## USF2     USF2  0.275138070 -0.28345258 5.392394e-02
## NFKB1   NFKB1  1.000481301 -0.90827417 5.514195e-02
## RELA     RELA  0.839829462 -0.80487637 5.764470e-02
## MYC       MYC -0.548489739  0.72965959 6.143308e-02
## POU2F1 POU2F1  0.248564021 -0.17529209 6.342470e-02
## CREB1   CREB1  0.291543068 -0.23842166 9.574464e-02
## FOXA1   FOXA1  0.404320732 -0.28102497 1.066292e-01
## RARA     RARA  0.307959426 -0.36115339 1.101666e-01
## HIF1A   HIF1A -0.395263528  0.54208084 1.614610e-01
## JUN       JUN  0.572433417 -0.48648973 1.928151e-01
## MITF     MITF -0.174366871  0.22371904 1.942280e-01
## STAT3   STAT3  0.426223277 -0.26600680 2.017725e-01
## WT1       WT1 -0.192045041  0.20279342 2.267090e-01
## YY1       YY1  0.157720689 -0.19639103 2.419266e-01
## FOS       FOS  0.333550145 -0.20980971 2.938933e-01
## IRF1     IRF1  0.294117844 -0.11282402 3.104308e-01
## GATA2   GATA2  0.165607310 -0.31099269 3.357450e-01
## TFAP2A TFAP2A  0.138567194 -0.12440554 3.695835e-01
## ETS2     ETS2  0.154522286 -0.15102815 3.909946e-01
## ETS1     ETS1  0.311731933 -0.15718867 4.305963e-01
## SMAD3   SMAD3  0.273632256 -0.12745483 4.523862e-01
## PPARA   PPARA -0.043789965  0.11157770 5.204151e-01
## GATA3   GATA3 -0.088361260  0.07587451 5.891638e-01
## AR         AR  0.143489046 -0.07548272 6.280410e-01
## PPARG   PPARG -0.077238438  0.06370762 6.985112e-01
## STAT1   STAT1  0.151063758 -0.04814787 7.095594e-01
## ELK1     ELK1  0.071563697 -0.01550449 8.071984e-01
## CTCF     CTCF -0.024629716  0.02533019 8.782196e-01
## NFIC     NFIC  0.023764280  0.04991605 8.821280e-01
## FOXM1   FOXM1 -0.008024378 -0.05414852 9.165742e-01
## TP53     TP53  0.007245159  0.02658336 9.427124e-01