The DNA methylation state of a tissue can be used to estimate its mitotic age (i.e, number of cell divisions that a lineage has accrued). In a number of complex diseases (e.g, cardiovascular disease, metabolic syndrome, cancer), the mitotic age of tissues appears older than in healthy samples, where mitotic age is proportional to chronological age. This epigenetic signature could be used as an early predictor for these diseases. Here we will analyse the mitotic age of colorectal cancer and healthy intestinal mucosa samples. We will use a subset of a large genome-wide DNA methylation profiling dataset (GSE101764). We start by loading the required packages and importing the dataset. The cg IDs denote particular CpG islands in the genome for which the methylation status was measured.

#Load packages
library(tidyverse)
library(methylCIPHER)
library(ggpubr)

#Import and tidy dataset
CRC_pheno <-read.csv("https://raw.githubusercontent.com/mss-genomics/cancer-epigenetics/main/CRC_samples_1.csv")
CRC_methylation <- read.csv("https://raw.githubusercontent.com/mss-genomics/cancer-epigenetics/main/CRC_filtered.csv", header = TRUE, row.names = 1)
CRC_beta_inverse <- t(CRC_methylation)
as_tibble(CRC_pheno)
## # A tibble: 102 × 4
##    Sample       Age Sex   Group 
##    <chr>      <int> <chr> <chr> 
##  1 GSM2714567    86 M     mucosa
##  2 GSM2714568    69 M     mucosa
##  3 GSM2714569    58 M     tumor 
##  4 GSM2714570    72 M     tumor 
##  5 GSM2714571    58 M     mucosa
##  6 GSM2714572    72 M     mucosa
##  7 GSM2714573    79 F     tumor 
##  8 GSM2714574    76 M     tumor 
##  9 GSM2714575    79 F     mucosa
## 10 GSM2714576    76 M     mucosa
## # ℹ 92 more rows
as_tibble(rownames_to_column(as.data.frame(CRC_beta_inverse), var="Sample"))
## # A tibble: 102 × 164
##    Sample     cg00043095 cg00347369 cg00397986 cg00466268 cg00884606 cg00916884
##    <chr>           <dbl>      <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
##  1 GSM2714567     0.0797      0.209      0.207      0.178     0.0955     0.134 
##  2 GSM2714568     0.0619      0.209      0.157      0.229     0.129      0.206 
##  3 GSM2714569     0.0438      0.288      0.418      0.171     0.0956     0.123 
##  4 GSM2714570     0.0681      0.557      0.765      0.375     0.0607     0.630 
##  5 GSM2714571     0.0575      0.220      0.108      0.250     0.117      0.129 
##  6 GSM2714572     0.0821      0.270      0.135      0.233     0.0946     0.104 
##  7 GSM2714573     0.0534      0.316      0.607      0.142     0.0706     0.0507
##  8 GSM2714574     0.0642      0.191      0.394      0.183     0.0996     0.0780
##  9 GSM2714575     0.0666      0.240      0.269      0.238     0.141      0.300 
## 10 GSM2714576     0.0555      0.292      0.243      0.295     0.157      0.330 
## # ℹ 92 more rows
## # ℹ 157 more variables: cg01435574 <dbl>, cg01537995 <dbl>, cg01587896 <dbl>,
## #   cg01699217 <dbl>, cg01783070 <dbl>, cg01830294 <dbl>, cg02150988 <dbl>,
## #   cg02186542 <dbl>, cg02266732 <dbl>, cg02631468 <dbl>, cg02726121 <dbl>,
## #   cg02796545 <dbl>, cg02964724 <dbl>, cg03045635 <dbl>, cg03111498 <dbl>,
## #   cg03140968 <dbl>, cg03181582 <dbl>, cg03430846 <dbl>, cg03450948 <dbl>,
## #   cg03603951 <dbl>, cg03874199 <dbl>, cg04188273 <dbl>, cg04408488 <dbl>, …

Calculate the mitotic age (in total cell divisions) for each sample using the EpiTOC2 model from the R MethylCipher package.

CRC_output <- calcEpiTOC2(CRC_beta_inverse, CRC_pheno, imputation = T)
as_tibble(CRC_output)
## # A tibble: 102 × 5
##    Sample       Age Sex   Group  epiTOC2
##    <chr>      <int> <chr> <chr>    <dbl>
##  1 GSM2714567    86 M     mucosa   6764.
##  2 GSM2714568    69 M     mucosa   6982.
##  3 GSM2714569    58 M     tumor   12032.
##  4 GSM2714570    72 M     tumor   22947.
##  5 GSM2714571    58 M     mucosa   6770.
##  6 GSM2714572    72 M     mucosa   6733.
##  7 GSM2714573    79 F     tumor   15555.
##  8 GSM2714574    76 M     tumor   13790.
##  9 GSM2714575    79 F     mucosa  10957.
## 10 GSM2714576    76 M     mucosa  10602.
## # ℹ 92 more rows

Calculate the adjusted epiTOC2 mitotic age (in annual cell divisions).

IRepiTOC2 <- CRC_output$epiTOC2/CRC_output$Age
CRC_output <- cbind(CRC_output, IRepiTOC2)
as_tibble(CRC_output)
## # A tibble: 102 × 6
##    Sample       Age Sex   Group  epiTOC2 IRepiTOC2
##    <chr>      <int> <chr> <chr>    <dbl>     <dbl>
##  1 GSM2714567    86 M     mucosa   6764.      78.6
##  2 GSM2714568    69 M     mucosa   6982.     101. 
##  3 GSM2714569    58 M     tumor   12032.     207. 
##  4 GSM2714570    72 M     tumor   22947.     319. 
##  5 GSM2714571    58 M     mucosa   6770.     117. 
##  6 GSM2714572    72 M     mucosa   6733.      93.5
##  7 GSM2714573    79 F     tumor   15555.     197. 
##  8 GSM2714574    76 M     tumor   13790.     181. 
##  9 GSM2714575    79 F     mucosa  10957.     139. 
## 10 GSM2714576    76 M     mucosa  10602.     139. 
## # ℹ 92 more rows

We calculate the Pearson correlation of mitotic age with chronological age for each group. We note that the mitotic age is significantly correlated with chronological age in healthy mucosa but not in colorectal cancer samples.

ggscatter(CRC_output, x = "Age", y = "epiTOC2", add = "reg.line", conf.int = TRUE,                          
         color="Group", palette = c("darkred", "seashell4"), size = 4)  +theme_minimal() + xlab("Chronological Age (Years)") + ylab("Mitotic Age (Total Cell Divisions)") + 
  stat_cor(method = "pearson", size = 6, cor.coef.name = c("r"), aes(color = Group), label.x = 20, label.y.npc=0.37) + 
  theme(legend.position="right", axis.title.x = element_text(size = 18),axis.text.x = element_text(size = 13),axis.title.y = element_text(size = 18),axis.text.y = element_text(size = 13))

We compare the average mitotic age between cancer and healthy mucosa samples and assess whether the difference is statistically significant with a t-test. We note that the epiTOC2-calculated mitotic age is significantly accelerated in colorectal cancer compared to non-malignant mucosa. Accelerated mitotic age in colorectal cancer tissue versus healthy mucosa reflects the increased number of cell divisions that cancer cells undergo.

ggviolin(CRC_output, x="Group", y="IRepiTOC2",fill = "Group", palette = c("darkred", "seashell4"), add=c("mean_ci"), size=1.2, width=0.9) + 
  theme_minimal() + 
  theme(legend.position="none",  axis.title.x = element_text(size = 17),
        axis.text.x = element_text(size = 15),
        axis.title.y = element_text(size = 17),
        axis.text.y = element_text(size = 13)) + xlab("Group") + ylab("Mitotic Age (Annual Divisions)") +
  stat_compare_means(method="t.test",label = "p.format", label.x=2.2, label.y =50, size = 7)