Install and load required packages.
# library(devtools)
# install_github('sinhrks/ggfortify')
library(ggfortify)
library(tidyverse)
library(ggpubr)
# BiocManager::install("RNAAgeCalc")
library(RNAAgeCalc)
Load the Behcet disease data.
demo_counts <- read.csv("https://raw.githubusercontent.com/mss-genomics/aging-signature/main/GSE205867_count_matrix.csv") # Read count matrix data from CSV file
rownames(demo_counts) <- demo_counts[,1] # Assign the first column as row names
demo_counts2 <- demo_counts[,-1] # Remove the first column (assuming it contains row names)
as_tibble(demo_counts)
## # A tibble: 58,735 × 21
## X BD1 BD2 BD3 BD4 BD5 BD6 BD7 BD9 BD10 BD8 HC1 HC2
## <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 ENSG… 0 6 0 4 0 0 0 1 6 0 0 2
## 2 ENSG… 0 1 0 0 0 0 0 0 0 0 0 0
## 3 ENSG… 671 652 753 875 662 488 664 776 878 738 903 807
## 4 ENSG… 792 529 614 569 600 561 512 551 560 532 483 675
## 5 ENSG… 79 110 60 107 92 48 62 70 67 61 75 170
## 6 ENSG… 25251 48730 29339 47616 36536 42902 32563 33878 35580 48033 40598 38510
## 7 ENSG… 56 74 50 39 37 65 61 140 36 26 4 16
## 8 ENSG… 57 62 59 64 63 73 98 59 59 90 41 77
## 9 ENSG… 111 93 67 208 93 82 119 131 194 75 116 75
## 10 ENSG… 2664 3218 3289 2340 1451 2427 1611 2110 2105 2707 3382 2984
## # ℹ 58,725 more rows
## # ℹ 8 more variables: HC3 <int>, HC4 <int>, HC5 <int>, HC6 <int>, HC7 <int>,
## # HC9 <int>, HC10 <int>, HC8 <int>
demo_pheno <- read.csv("https://raw.githubusercontent.com/mss-genomics/aging-signature/main/GSE205867_sample_info.csv") # Read sample information from CSV file
as_tibble(demo_pheno)
## # A tibble: 20 × 13
## Sample age Group Batch Sex Oral_ulcer Genital_ulcer GI_involvement
## <chr> <int> <chr> <int> <chr> <chr> <chr> <chr>
## 1 BD1 45 BD 1 male Yes Yes Yes
## 2 BD2 74 BD 2 male Yes Yes No
## 3 BD3 55 BD 3 male Yes Yes No
## 4 BD4 34 BD 4 male Yes No No
## 5 BD5 35 BD 5 male Yes No No
## 6 BD6 33 BD 6 male Yes No No
## 7 BD7 32 BD 7 male Yes Yes No
## 8 BD8 45 BD 8 male Yes Yes No
## 9 BD9 41 BD 9 female Yes No No
## 10 BD10 31 BD 10 female Yes No No
## 11 HC1 46 Control 1 male No No No
## 12 HC2 73 Control 2 male No No No
## 13 HC3 50 Control 3 male No No No
## 14 HC4 32 Control 4 male No No No
## 15 HC5 36 Control 5 male No No No
## 16 HC6 36 Control 6 male No No No
## 17 HC7 24 Control 7 male No No No
## 18 HC8 43 Control 8 male No No No
## 19 HC9 40 Control 9 female No No No
## 20 HC10 30 Control 10 female No No No
## # ℹ 5 more variables: Erythema_nodule <chr>, Ocular_involvement <chr>,
## # Vascular_involvement <chr>, CNS_involvement <chr>, Arthritis <chr>
Calculate predicted transcriptional age using 7 different signatures and fit linear regression between predicted age and chronological age for each model.
signatures <- c("DESeq2","Pearson","Dev","deMagalhaes","GenAge","GTExAge","Peters")
for (signature in signatures) {
demo_clocks <- predict_age(exprdata = demo_counts2, tissue = "blood",
exprtype = "counts",
idtype = "ENSEMBL",
signature = signature,
chronage = demo_pheno) # Calculate predicted age using RNAAgeCalc package
names(demo_clocks)[names(demo_clocks) == "RNAAge"] <- "age" # Rename the predicted age column
lm<-lm(age ~ ChronAge, demo_clocks) # Fit a linear regression model between predicted age and chronological age
resid<-lm$residuals # Select the residuals of the model
demo_clocks <- demo_clocks[,-2] # Remove the ChronAge column
age<-demo_clocks
demo_pheno <- cbind(demo_pheno,age,resid)
age <- paste("age",signature,sep="") # Create a name for the predicted age column
resid <- paste("resid",signature,sep="") # Create a name for the residual column
colnames(demo_pheno)[ncol(demo_pheno)-1] <- age # Rename predicted age column
colnames(demo_pheno)[ncol(demo_pheno)] <- resid # Rename residual column
}
as_tibble(demo_pheno)
## # A tibble: 20 × 27
## Sample age Group Batch Sex Oral_ulcer Genital_ulcer GI_involvement
## <chr> <int> <chr> <int> <chr> <chr> <chr> <chr>
## 1 BD1 45 BD 1 male Yes Yes Yes
## 2 BD2 74 BD 2 male Yes Yes No
## 3 BD3 55 BD 3 male Yes Yes No
## 4 BD4 34 BD 4 male Yes No No
## 5 BD5 35 BD 5 male Yes No No
## 6 BD6 33 BD 6 male Yes No No
## 7 BD7 32 BD 7 male Yes Yes No
## 8 BD8 45 BD 8 male Yes Yes No
## 9 BD9 41 BD 9 female Yes No No
## 10 BD10 31 BD 10 female Yes No No
## 11 HC1 46 Control 1 male No No No
## 12 HC2 73 Control 2 male No No No
## 13 HC3 50 Control 3 male No No No
## 14 HC4 32 Control 4 male No No No
## 15 HC5 36 Control 5 male No No No
## 16 HC6 36 Control 6 male No No No
## 17 HC7 24 Control 7 male No No No
## 18 HC8 43 Control 8 male No No No
## 19 HC9 40 Control 9 female No No No
## 20 HC10 30 Control 10 female No No No
## # ℹ 19 more variables: Erythema_nodule <chr>, Ocular_involvement <chr>,
## # Vascular_involvement <chr>, CNS_involvement <chr>, Arthritis <chr>,
## # ageDESeq2 <dbl>, residDESeq2 <dbl>, agePearson <dbl>, residPearson <dbl>,
## # ageDev <dbl>, residDev <dbl>, agedeMagalhaes <dbl>, residdeMagalhaes <dbl>,
## # ageGenAge <dbl>, residGenAge <dbl>, ageGTExAge <dbl>, residGTExAge <dbl>,
## # agePeters <dbl>, residPeters <dbl>
Perform principal component analysis for predicted ages calculated using the 7 signatures.
demo_pca <- prcomp(demo_pheno[,c(14,16,18,20,22,24,26)],
center = TRUE,
scale. = TRUE) # Perform PCA on a subset of columns in demo_pheno
summary(demo_pca) # Print summary statistics of the PCA results
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.607 1.198 1.0936 0.8468 0.73236 0.60653 0.40624
## Proportion of Variance 0.369 0.205 0.1708 0.1024 0.07662 0.05255 0.02358
## Cumulative Proportion 0.369 0.574 0.7448 0.8472 0.92387 0.97642 1.00000
Plot the PCA results for Behcet disease samples and controls.
demo_pca_plot <- autoplot(demo_pca,
data = demo_pheno,
colour = 'Group') # Create a PCA plot using ggfortify package
demo_pca_plot
Compare mean residual from calculated age using GTExAge signature for Behcet disease vs. controls.
# Analysis - T-test plot
ggboxplot(demo_pheno, x = "Group", y = "residGTExAge",
color = "Group", palette = "jco",
add = "jitter") + stat_compare_means(method = "t.test") # Create a boxplot with t-test comparison using ggpubr package
Plot correlation between predicted age calculated using Dev signature and chronological age.
# Analysis - Pearson correlation plot
ggscatter(demo_pheno, x = "age", y = "ageDev",
add = "reg.line", # Add regression line
add.params = list(color = "blue", fill = "lightgray"), # Customize regression line
conf.int = TRUE) + stat_cor(method = "pearson", label.x = 3, label.y = 30) # Create a scatter plot with Pearson correlation using ggpub