library(biomaRt)
library(ensembldb)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
## 
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
## 
##     filter
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:ensembldb':
## 
##     filter, select
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(viper)      
library(dorothea)
# Load the gene expression dataset for HTN
HTNdata <- read.csv("/Users/lorandacalderonzamora/Downloads/datExpr_integrative_HTN.csv", row.names = NULL)
colnames(HTNdata)[1] <- "ensembl_gene_id"
rownames(HTNdata) <- HTNdata$ensembl_gene_id
HTNdata <- HTNdata[, -1]
# Retrieving Gene Annotation from Ensembl with biomaRt
ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
getinfo <- c("ensembl_gene_id", "entrezgene_id", "external_gene_name")

geneDat <- getBM(attributes = getinfo,
                 filters = "ensembl_gene_id",
                 values = rownames(HTNdata),
                 mart = ensembl)
# Prepare Expression Matrix with Unique Gene Names
HTNdata$ensembl_gene_id <- rownames(HTNdata)
expr_matrix <- merge(HTNdata, geneDat, by = "ensembl_gene_id", all.x = TRUE)
expr_matrix <- expr_matrix[!duplicated(expr_matrix$external_gene_name), ]

expr_matrix_numeric <- expr_matrix[, grep("GSM", colnames(expr_matrix))]

expr_matrix_unique <- aggregate(expr_matrix_numeric,
                                by = list(expr_matrix$external_gene_name),
                                FUN = mean)

colnames(expr_matrix_unique)[1] <- "external_gene_name"
rownames(expr_matrix_unique) <- expr_matrix_unique$external_gene_name
expr_matrix_unique <- expr_matrix_unique[, -1]
# Infer Transcription Factor Activity Using DoRothEA and VIPER
data(dorothea_hs, package = "dorothea")

dorothea_regulons <- dorothea_hs %>%
  filter(confidence %in% c("A", "B"))

table(dorothea_regulons$target %in% rownames(expr_matrix_unique))
## 
## FALSE  TRUE 
##   316  6092
my_regulon <- dorothea_regulons %>%
  group_by(tf) %>%
  summarise(
    targets = list(setNames(mor, target)),
    .groups = "drop"
  )
my_regulon <- setNames(my_regulon$targets, my_regulon$tf)

my_regulon_viper <- lapply(my_regulon, function(targets) {
  list(tfmode = targets, likelihood = rep(1, length(targets)))
})

tf_activity <- viper(eset = as.matrix(expr_matrix_unique),
                     regulon = my_regulon_viper,
                     method = "scale",
                     nes = TRUE)
## 
## Computing the association scores
## Computing regulons enrichment with aREA
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   2%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   8%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  12%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  18%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  24%  |                                                                              |==================                                                    |  25%  |                                                                              |===================                                                   |  27%  |                                                                              |=====================                                                 |  29%  |                                                                              |======================                                                |  31%  |                                                                              |=======================                                               |  33%  |                                                                              |=========================                                             |  35%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  39%  |                                                                              |=============================                                         |  41%  |                                                                              |==============================                                        |  43%  |                                                                              |================================                                      |  45%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  49%  |                                                                              |====================================                                  |  51%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  55%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  59%  |                                                                              |===========================================                           |  61%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  65%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  71%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  75%  |                                                                              |======================================================                |  76%  |                                                                              |=======================================================               |  78%  |                                                                              |========================================================              |  80%  |                                                                              |==========================================================            |  82%  |                                                                              |===========================================================           |  84%  |                                                                              |============================================================          |  86%  |                                                                              |==============================================================        |  88%  |                                                                              |===============================================================       |  90%  |                                                                              |=================================================================     |  92%  |                                                                              |==================================================================    |  94%  |                                                                              |===================================================================   |  96%  |                                                                              |===================================================================== |  98%  |                                                                              |======================================================================| 100%
# Assign Sample Group Labels
group_labels <- rep(NA, ncol(tf_activity))
names(group_labels) <- colnames(tf_activity)

group_labels[names(group_labels) %in% c(
  "GSM700797", "GSM700798", "GSM609528", "GSM609529", 
                                 "GSM609530", "GSM701161", "GSM701162", "GSM701163", "GSM701164", "GSM701165")] <- "CTL"

group_labels[names(group_labels) %in% c(
  "GSM700799", "GSM700800", "GSM700801", "GSM700802", "GSM700803", 
                                 "GSM609526", "GSM609527", "GSM701166", "GSM701167", "GSM701168", "GSM701169", "GSM701170", "GSM701171", "GSM701172", "GSM701173", "GSM701174")] <- "Disease"

table(group_labels)
## group_labels
##     CTL Disease 
##      10      15
ctl_HTN <- which(group_labels == "CTL")
disease_HTN <- which(group_labels == "Disease")
# Differential TF Activity Between Disease and Control Groups
tf_means <- data.frame(
  TF = rownames(tf_activity),
  Disease = rowMeans(tf_activity[, disease_HTN]),
  CTL = rowMeans(tf_activity[, ctl_HTN]),
  stringsAsFactors = FALSE
)

p_values <- apply(tf_activity, 1, function(x) {
  t.test(x[disease_HTN], x[ctl_HTN])$p.value
})

tf_means$p_value <- p_values
tf_means <- tf_means[order(tf_means$p_value), ]
print(tf_means)
##            TF      Disease         CTL     p_value
## MYC       MYC  1.029742389 -1.14035336 0.002821204
## IRF1     IRF1 -0.928736717  0.72438174 0.009246005
## STAT1   STAT1 -0.896676487  1.04482428 0.022539370
## BACH1   BACH1 -0.248729768  0.21865482 0.029985910
## AR         AR  0.900256432 -0.46222860 0.030254039
## STAT2   STAT2 -1.490591239  1.01993589 0.034874604
## NFIC     NFIC  0.744533939 -0.43181826 0.037941489
## GATA2   GATA2 -0.461924247  0.48767944 0.040654444
## HNF4A   HNF4A  1.323430160 -1.15445149 0.041778702
## FOXO1   FOXO1  0.438166463 -0.36166742 0.056233137
## PPARG   PPARG  0.359368297 -0.38772707 0.060037088
## GATA3   GATA3 -0.332275092  0.60364116 0.066342239
## TP53     TP53 -0.691140901  0.50805048 0.069121015
## WT1       WT1  0.192267951 -0.27997871 0.073211175
## SPI1     SPI1 -1.841172627  0.98363748 0.090743376
## HIF1A   HIF1A  0.356348628 -0.34023163 0.102636419
## CEBPA   CEBPA  0.529175788 -0.24023987 0.106596103
## PPARA   PPARA  0.487527213 -0.39163912 0.113476100
## SP1       SP1  0.535161708 -0.55748601 0.123958356
## SP3       SP3  0.715294515 -0.40872436 0.125272098
## MYB       MYB -0.221979900  0.35824747 0.143838781
## EGR1     EGR1  0.474368572 -0.24987015 0.175151216
## FOS       FOS -0.332814065  0.21245048 0.221360254
## ELK1     ELK1  0.520370522 -0.31876189 0.222818440
## YY1       YY1 -0.190711039  0.21810612 0.231385040
## FOXM1   FOXM1 -0.416488278  0.07880612 0.237578888
## RARA     RARA  0.142396729 -0.30784804 0.270798437
## ETS1     ETS1 -0.366266858  0.30905933 0.279268538
## E2F1     E2F1 -0.392937899  0.41602611 0.293166454
## FOXA1   FOXA1  0.124729884 -0.47864930 0.313853545
## USF2     USF2  0.263275895 -0.18494540 0.346051962
## SMAD3   SMAD3  0.298984226 -0.26612197 0.349145161
## FOXP1   FOXP1  0.452709239 -1.00362895 0.353837871
## E2F4     E2F4 -0.453794782  0.43408083 0.357646234
## USF1     USF1  0.319455325 -0.16497559 0.379465963
## RELA     RELA -0.616430116  0.19151953 0.380283449
## NFKB1   NFKB1 -0.703150942  0.19164807 0.417006723
## ESR1     ESR1  0.151609778 -0.14858913 0.462089307
## SMAD4   SMAD4  0.145044060 -0.27546887 0.475480130
## ZNF263 ZNF263 -0.209024929  0.66420583 0.616637061
## TFAP2A TFAP2A -0.044418168 -0.25914268 0.630972607
## CTCF     CTCF -0.122130992  0.05903300 0.689433730
## JUN       JUN  0.017447363 -0.14068631 0.710214390
## MITF     MITF  0.215980868  0.01702302 0.721071007
## CREB1   CREB1  0.189319781  0.04037621 0.742895671
## STAT3   STAT3 -0.293572629 -0.08957053 0.768523283
## CEBPB   CEBPB -0.089378855 -0.19850562 0.881257257
## PRDM14 PRDM14  0.002986301  0.13270540 0.900912393
## ETS2     ETS2 -0.077839774 -0.03185588 0.911361887
## POU2F1 POU2F1  0.038870443  0.05188478 0.964805236
## FOXO3   FOXO3 -0.141510266 -0.14114323 0.999296673