Install and load required packages.

# library(devtools)
# install_github('sinhrks/ggfortify')
library(ggfortify)
library(tidyverse)
library(ggpubr)
# BiocManager::install("RNAAgeCalc")
library(RNAAgeCalc)

Load the Behcet disease data.

demo_counts <- read.csv("https://raw.githubusercontent.com/mss-genomics/aging-signature/main/GSE205867_count_matrix.csv") # Read count matrix data from CSV file
rownames(demo_counts) <- demo_counts[,1]  # Assign the first column as row names
demo_counts2 <- demo_counts[,-1]  # Remove the first column (assuming it contains row names)
as_tibble(demo_counts)
## # A tibble: 58,735 × 21
##    X       BD1   BD2   BD3   BD4   BD5   BD6   BD7   BD9  BD10   BD8   HC1   HC2
##    <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
##  1 ENSG…     0     6     0     4     0     0     0     1     6     0     0     2
##  2 ENSG…     0     1     0     0     0     0     0     0     0     0     0     0
##  3 ENSG…   671   652   753   875   662   488   664   776   878   738   903   807
##  4 ENSG…   792   529   614   569   600   561   512   551   560   532   483   675
##  5 ENSG…    79   110    60   107    92    48    62    70    67    61    75   170
##  6 ENSG… 25251 48730 29339 47616 36536 42902 32563 33878 35580 48033 40598 38510
##  7 ENSG…    56    74    50    39    37    65    61   140    36    26     4    16
##  8 ENSG…    57    62    59    64    63    73    98    59    59    90    41    77
##  9 ENSG…   111    93    67   208    93    82   119   131   194    75   116    75
## 10 ENSG…  2664  3218  3289  2340  1451  2427  1611  2110  2105  2707  3382  2984
## # ℹ 58,725 more rows
## # ℹ 8 more variables: HC3 <int>, HC4 <int>, HC5 <int>, HC6 <int>, HC7 <int>,
## #   HC9 <int>, HC10 <int>, HC8 <int>
demo_pheno <- read.csv("https://raw.githubusercontent.com/mss-genomics/aging-signature/main/GSE205867_sample_info.csv") # Read sample information from CSV file
as_tibble(demo_pheno)
## # A tibble: 20 × 13
##    Sample   age Group   Batch Sex    Oral_ulcer Genital_ulcer GI_involvement
##    <chr>  <int> <chr>   <int> <chr>  <chr>      <chr>         <chr>         
##  1 BD1       45 BD          1 male   Yes        Yes           Yes           
##  2 BD2       74 BD          2 male   Yes        Yes           No            
##  3 BD3       55 BD          3 male   Yes        Yes           No            
##  4 BD4       34 BD          4 male   Yes        No            No            
##  5 BD5       35 BD          5 male   Yes        No            No            
##  6 BD6       33 BD          6 male   Yes        No            No            
##  7 BD7       32 BD          7 male   Yes        Yes           No            
##  8 BD8       45 BD          8 male   Yes        Yes           No            
##  9 BD9       41 BD          9 female Yes        No            No            
## 10 BD10      31 BD         10 female Yes        No            No            
## 11 HC1       46 Control     1 male   No         No            No            
## 12 HC2       73 Control     2 male   No         No            No            
## 13 HC3       50 Control     3 male   No         No            No            
## 14 HC4       32 Control     4 male   No         No            No            
## 15 HC5       36 Control     5 male   No         No            No            
## 16 HC6       36 Control     6 male   No         No            No            
## 17 HC7       24 Control     7 male   No         No            No            
## 18 HC8       43 Control     8 male   No         No            No            
## 19 HC9       40 Control     9 female No         No            No            
## 20 HC10      30 Control    10 female No         No            No            
## # ℹ 5 more variables: Erythema_nodule <chr>, Ocular_involvement <chr>,
## #   Vascular_involvement <chr>, CNS_involvement <chr>, Arthritis <chr>

Calculate predicted transcriptional age using 7 different signatures and fit linear regression between predicted age and chronological age for each model.

signatures <- c("DESeq2","Pearson","Dev","deMagalhaes","GenAge","GTExAge","Peters")
for (signature in signatures) {
  demo_clocks <- predict_age(exprdata = demo_counts2, tissue = "blood", 
                                exprtype = "counts", 
                                idtype = "ENSEMBL",
                                signature = signature,
                                chronage = demo_pheno) # Calculate predicted age using RNAAgeCalc package
  names(demo_clocks)[names(demo_clocks) == "RNAAge"] <- "age" # Rename the predicted age column
  lm<-lm(age ~ ChronAge, demo_clocks) # Fit a linear regression model between predicted age and chronological age
  resid<-lm$residuals # Select the residuals of the model
  demo_clocks <- demo_clocks[,-2] # Remove the ChronAge column
  age<-demo_clocks
  demo_pheno <- cbind(demo_pheno,age,resid)
  age <- paste("age",signature,sep="") # Create a name for the predicted age column
  resid <- paste("resid",signature,sep="") # Create a name for the residual column
  colnames(demo_pheno)[ncol(demo_pheno)-1] <- age # Rename predicted age column
  colnames(demo_pheno)[ncol(demo_pheno)] <- resid # Rename residual column
}
as_tibble(demo_pheno)
## # A tibble: 20 × 27
##    Sample   age Group   Batch Sex    Oral_ulcer Genital_ulcer GI_involvement
##    <chr>  <int> <chr>   <int> <chr>  <chr>      <chr>         <chr>         
##  1 BD1       45 BD          1 male   Yes        Yes           Yes           
##  2 BD2       74 BD          2 male   Yes        Yes           No            
##  3 BD3       55 BD          3 male   Yes        Yes           No            
##  4 BD4       34 BD          4 male   Yes        No            No            
##  5 BD5       35 BD          5 male   Yes        No            No            
##  6 BD6       33 BD          6 male   Yes        No            No            
##  7 BD7       32 BD          7 male   Yes        Yes           No            
##  8 BD8       45 BD          8 male   Yes        Yes           No            
##  9 BD9       41 BD          9 female Yes        No            No            
## 10 BD10      31 BD         10 female Yes        No            No            
## 11 HC1       46 Control     1 male   No         No            No            
## 12 HC2       73 Control     2 male   No         No            No            
## 13 HC3       50 Control     3 male   No         No            No            
## 14 HC4       32 Control     4 male   No         No            No            
## 15 HC5       36 Control     5 male   No         No            No            
## 16 HC6       36 Control     6 male   No         No            No            
## 17 HC7       24 Control     7 male   No         No            No            
## 18 HC8       43 Control     8 male   No         No            No            
## 19 HC9       40 Control     9 female No         No            No            
## 20 HC10      30 Control    10 female No         No            No            
## # ℹ 19 more variables: Erythema_nodule <chr>, Ocular_involvement <chr>,
## #   Vascular_involvement <chr>, CNS_involvement <chr>, Arthritis <chr>,
## #   ageDESeq2 <dbl>, residDESeq2 <dbl>, agePearson <dbl>, residPearson <dbl>,
## #   ageDev <dbl>, residDev <dbl>, agedeMagalhaes <dbl>, residdeMagalhaes <dbl>,
## #   ageGenAge <dbl>, residGenAge <dbl>, ageGTExAge <dbl>, residGTExAge <dbl>,
## #   agePeters <dbl>, residPeters <dbl>

Perform principal component analysis for predicted ages calculated using the 7 signatures.

demo_pca <- prcomp(demo_pheno[,c(14,16,18,20,22,24,26)],
                   center = TRUE,
                   scale. = TRUE)  # Perform PCA on a subset of columns in demo_pheno
summary(demo_pca)  # Print summary statistics of the PCA results
## Importance of components:
##                          PC1   PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.607 1.198 1.0936 0.8468 0.73236 0.60653 0.40624
## Proportion of Variance 0.369 0.205 0.1708 0.1024 0.07662 0.05255 0.02358
## Cumulative Proportion  0.369 0.574 0.7448 0.8472 0.92387 0.97642 1.00000

Plot the PCA results for Behcet disease samples and controls.

demo_pca_plot <- autoplot(demo_pca,
                          data = demo_pheno,
                          colour = 'Group')  # Create a PCA plot using ggfortify package
demo_pca_plot

Compare mean residual from calculated age using GTExAge signature for Behcet disease vs. controls.

# Analysis - T-test plot
ggboxplot(demo_pheno, x = "Group", y = "residGTExAge",
          color = "Group", palette = "jco",
          add = "jitter") + stat_compare_means(method = "t.test")  # Create a boxplot with t-test comparison using ggpubr package

Plot correlation between predicted age calculated using Dev signature and chronological age.

# Analysis - Pearson correlation plot
ggscatter(demo_pheno, x = "age", y = "ageDev",
          add = "reg.line",  # Add regression line
          add.params = list(color = "blue", fill = "lightgray"),  # Customize regression line
          conf.int = TRUE) + stat_cor(method = "pearson", label.x = 3, label.y = 30)  # Create a scatter plot with Pearson correlation using ggpub