Introduction

This report investigates mitochondrial genetic variation, specifically the 488T>C mutation (m.2158T>C, MT-RNR2/M-SHLP2, c.11A>G, p.Lys4Arg), in individuals with Complex Regional Pain Syndrome (CRPS) compared to pain-free controls, using whole-genome sequencing data from the UK Biobank.

Study Cohorts

CRPS cases: UK Biobank participants of White ancestry who reported a CRPS diagnosis with symptom duration greater than 1 year (pain_time: “1–5 years” or “More than 5 years”).

Controls: UK Biobank participants of White ancestry with no reported CRPS diagnosis.

Controls were selected via 1:1 age- and sex-stratified sampling to match the CRPS case distribution, minimising confounding by demographic differences.

Table 1. Cohort Characterisation

# Load CRPS cases (same filters as sampling.Rmd)
raw <- read.csv("data/CRPS_control.csv", stringsAsFactors = FALSE)
colnames(raw) <- c("eid", "birthyear", "sex", "crps", "pain_time")
raw <- raw %>% mutate(age = 2026 - birthyear)

crps_cases <- raw %>%
  filter(crps == "Yes", sex != "", pain_time %in% c("1-5 years", "More than 5 years")) %>%
  drop_na(age, sex) %>%
  mutate(group = "CRPS")

# Load matched controls
sampled_ctrl <- read.csv("data/sampled_crps_control.csv", stringsAsFactors = FALSE) %>%
  mutate(group = "Control")

# Combine
cohort <- bind_rows(
  crps_cases %>% select(eid, age, sex, group),
  sampled_ctrl %>% select(eid, age, sex, group)
)

# Build Table 1
table1 <- cohort %>%
  group_by(group) %>%
  summarise(
    N = n(),
    Age_mean = round(mean(age, na.rm = TRUE), 1),
    Age_sd   = round(sd(age,  na.rm = TRUE), 1),
    Female_n = sum(sex == "Female"),
    Female_pct = round(Female_n / N * 100, 1)
  ) %>%
  mutate(
    `Age, mean (SD)` = paste0(Age_mean, " (", Age_sd, ")"),
    `Female, n (%)`  = paste0(Female_n, " (", Female_pct, "%)")
  ) %>%
  select(Group = group, N, `Age, mean (SD)`, `Female, n (%)`)

table1 %>%
  knitr::kable(caption = "Table 1. Cohort characteristics") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Table 1. Cohort characteristics
Group N Age, mean (SD) Female, n (%)
CRPS 1285 76.5 (7.3) 787 (61.2%)
Control 2917 76.4 (7.2) 1786 (61.2%)

UKBiobank

Data processing

First import data, then merge haplogroup information with genetic variant data. Extract m.2639 mutation status for all samples.

# Load clean haplogroup data
haplo_clean <- read_tsv("data/CRPS_summary_full/mutect2.haplogroup1.tab")

# Load VCF data
vcf <- read.vcfR("data/CRPS_summary_full/mutect2.03.merge.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 47
##   header_line: 48
##   variant count: 3873
##   column count: 2869
## Meta line 47 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3873
##   Character matrix gt cols: 2869
##   skip: 0
##   nrows: 3873
##   row_num: 0
## Processed variant 1000Processed variant 2000Processed variant 3000Processed variant: 3873
## All variants processed
# Clean sample IDs
haplo_clean <- haplo_clean %>%
  mutate(IID_clean = str_split_i(Run, "_", 1))

# Extract 488T>C mutation (more frequently referred to as m.2158T>C)
m488_pos <- "2158"
m488_index <- which(vcf@fix[, "POS"] == m488_pos)
gt <- extract.gt(vcf, element = "GT")
m488_gt <- gt[m488_index, ]

mutation_data <- data.frame(
  IID_vcf = names(m488_gt),
  m488_mutation = ifelse(grepl("1", m488_gt), 1, 0)
) %>%
  mutate(IID_clean = str_split_i(IID_vcf, "_", 1))

# Merge data
final_data <- haplo_clean %>%
  left_join(mutation_data, by = "IID_clean") %>%
  filter(!is.na(m488_mutation))

Summary the merge data:

total_samples <- nrow(final_data)
mutated_samples <- sum(final_data$m488_mutation)
mutation_rate <- mutated_samples / total_samples * 100

cat("**Total samples analyzed:**", total_samples, "\n\n")
## **Total samples analyzed:** 2855
cat("**Samples with m.488 mutation:**", mutated_samples, "\n\n") 
## **Samples with m.488 mutation:** 39
cat("**Overall mutation frequency:**", round(mutation_rate, 2), "%\n")
## **Overall mutation frequency:** 1.37 %

Calculated mutation frequencies across haplogroups

haplo_summary <- final_data %>%
  group_by(haplogroup) %>%
  summarise(
    n_samples = n(),
    n_mutated = sum(m488_mutation),
    mutation_freq = n_mutated / n_samples * 100
  ) %>%
  arrange(desc(mutation_freq))

# Display top haplogroups with mutations
haplo_summary_mutated <- haplo_summary %>%
  filter(n_mutated > 0)



haplo_summary %>%
  knitr::kable(
    col.names = c("Haplogroup", "Total Samples", "Mutated Samples", "Mutation Frequency (%)"),
    digits = 2
  )
Haplogroup Total Samples Mutated Samples Mutation Frequency (%)
J 297 39 13.13
B 1 0 0.00
C 2 0 0.00
D 1 0 0.00
F 1 0 0.00
H 1241 0 0.00
HV 63 0 0.00
I 103 0 0.00
K 230 0 0.00
L1 4 0 0.00
L2 3 0 0.00
L3 4 0 0.00
M 3 0 0.00
N 9 0 0.00
R 9 0 0.00
T 310 0 0.00
U 404 0 0.00
V 90 0 0.00
W 48 0 0.00
X 32 0 0.00

Specific Enrichment in J Haplogroup

The m.488 mutation shows an high frequency (13.79%) within the J haplogroup.

This represents a significant enrichment pattern compared to other haplogroups.

Reference: https://academic.oup.com/innovateage/article/3/Supplement_1/S838/5618469?guestAccessKey=

Control

Data processing

First import data, then merge haplogroup information with genetic variant data. Extract m.2639 mutation status for all samples.

# Load clean haplogroup data

haplo_clean <- read_tsv(
  "data/CRPS_control_summary/mutect2.haplogroup1.tab",
  col_names = c("Run", "haplogroup")
)

# Load VCF data
vcf <- read.vcfR("data/CRPS_control_summary/mutect2.03.merge.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 47
##   header_line: 48
##   variant count: 3784
##   column count: 2910
## Meta line 47 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3784
##   Character matrix gt cols: 2910
##   skip: 0
##   nrows: 3784
##   row_num: 0
## Processed variant 1000Processed variant 2000Processed variant 3000Processed variant: 3784
## All variants processed
# Clean sample IDs
haplo_clean <- haplo_clean %>%
  mutate(IID_clean = str_split_i(Run, "_", 1))

# Extract 488T>C mutation (more frequently referred to as m.2158T>C)
m488_pos <- "2158"
m488_index <- which(vcf@fix[, "POS"] == m488_pos)
gt <- extract.gt(vcf, element = "GT")
m488_gt <- gt[m488_index, ]

mutation_data <- data.frame(
  IID_vcf = names(m488_gt),
  m488_mutation = ifelse(grepl("1", m488_gt), 1, 0)
) %>%
  mutate(IID_clean = str_split_i(IID_vcf, "_", 1))

# Merge data
final_data_ctrl <- haplo_clean %>%
  left_join(mutation_data, by = "IID_clean") %>%
  filter(!is.na(m488_mutation))

Summary the merge data:

total_samples_ctrl <- nrow(final_data_ctrl)
mutated_samples_ctrl <- sum(final_data_ctrl$m488_mutation)
mutation_rate_ctrl <- mutated_samples_ctrl/ total_samples_ctrl * 100

cat("**Total samples analyzed:**", total_samples_ctrl, "\n\n")
## **Total samples analyzed:** 2900
cat("**Samples with m.2639 mutation:**", mutated_samples_ctrl, "\n\n") 
## **Samples with m.2639 mutation:** 36
cat("**Overall mutation frequency:**", round(mutation_rate_ctrl, 2), "%\n")
## **Overall mutation frequency:** 1.24 %

Calculated mutation frequencies across haplogroups

haplo_summary_ctrl <- final_data_ctrl %>%
  group_by(haplogroup) %>%
  summarise(
    n_samples = n(),
    n_mutated = sum(m488_mutation),
    mutation_freq = n_mutated / n_samples * 100
  ) %>%
  arrange(desc(mutation_freq))

# Display top haplogroups with mutations
haplo_summary_mutated_ctrl <- haplo_summary_ctrl %>%
  filter(n_mutated > 0)



haplo_summary_ctrl %>%
  knitr::kable(
    col.names = c("Haplogroup", "Total Samples", "Mutated Samples", "Mutation Frequency (%)"),
    digits = 2
  )
Haplogroup Total Samples Mutated Samples Mutation Frequency (%)
J 301 36 11.96
A 3 0 0.00
B 1 0 0.00
D 2 0 0.00
E 1 0 0.00
H 1221 0 0.00
HV 58 0 0.00
I 94 0 0.00
K 268 0 0.00
L1 1 0 0.00
L2 2 0 0.00
L3 5 0 0.00
L4 1 0 0.00
M 8 0 0.00
N 11 0 0.00
P 1 0 0.00
R 6 0 0.00
T 307 0 0.00
U 395 0 0.00
V 110 0 0.00
W 65 0 0.00
X 38 0 0.00
Y 1 0 0.00

Comparison of mutation

overall_main <- haplo_summary %>%
  summarise(
    n_samples = sum(n_samples),
    n_mutated = sum(n_mutated)
  ) %>%
  mutate(dataset = "CRPS")


overall_ctrl <- haplo_summary_ctrl %>%
  summarise(
    n_samples = sum(n_samples),
    n_mutated = sum(n_mutated)
  ) %>%
  mutate(dataset = "Control")
overall_table <- bind_rows(
  haplo_summary  %>% mutate(dataset = "CRPS"),

  haplo_summary_ctrl  %>% mutate(dataset = "Control")
) %>%
  summarise(
    n_samples = sum(n_samples),
    n_mutated = sum(n_mutated),
    .by = dataset
  ) %>%
  mutate(
    n_non_mutated = n_samples - n_mutated,
    mutation_pct = n_mutated / n_samples * 100
  ) %>%
  select(
    dataset,
    n_samples,
    n_mutated,
    n_non_mutated,
    mutation_pct
  )
overall_table %>%
  knitr::kable(
    col.names = c(
      "Dataset",
      "Total samples",
      "Mutated (n)",
      "Non-mutated (n)",
      "Mutation rate (%)"
    ),
    digits = 2
  )
Dataset Total samples Mutated (n) Non-mutated (n) Mutation rate (%)
CRPS 2855 39 2816 1.37
Control 2900 36 2864 1.24
haplo_J_table <- bind_rows(
  haplo_summary %>% mutate(dataset = "CRPS"),

  haplo_summary_ctrl %>% mutate(dataset = "Control")
) %>%
  filter(haplogroup == "J") %>%
  mutate(
    n_non_mutated = n_samples - n_mutated,
    mutation_pct = n_mutated / n_samples * 100
  ) %>%
  select(dataset, n_samples, n_mutated, n_non_mutated, mutation_pct)
haplo_J_table %>%
  knitr::kable(
    col.names = c(
      "Dataset",
      "Total J samples",
      "Mutated (n)",
      "Non-mutated (n)",
      "Mutation rate (%)"
    ),
    digits = 2
  )
Dataset Total J samples Mutated (n) Non-mutated (n) Mutation rate (%)
CRPS 297 39 258 13.13
Control 301 36 265 11.96

Test

contingency_J <- matrix(
  c(39, 258,   # CRPS
    36, 265),  # Dolorisk/Control
  nrow = 2,
  byrow = TRUE
)
rownames(contingency_J) <- c("CRPS", "Control")
colnames(contingency_J) <- c("Mutated", "Non_mutated")
fisher.test(contingency_J)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  contingency_J
## p-value = 0.7118
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.6656925 1.8634559
## sample estimates:
## odds ratio 
##   1.112527

Comparison of Haplogroup frequency

Haplogroup distribution

# 计算 haplogroup 分布比例
haplo_dist_main <- final_data %>%
  count(haplogroup, name = "n_main") %>%
  mutate(pct_main = n_main / sum(n_main) * 100)


haplo_dist_ctrl <- final_data_ctrl %>%
  count(haplogroup, name = "n_ctrl") %>%
  mutate(pct_ctrl = n_ctrl / sum(n_ctrl) * 100)

# 合并

haplo_compare <- haplo_dist_main %>%
  full_join(haplo_dist_ctrl, by = "haplogroup") %>%
  mutate(
    across(
      c(n_main, pct_main,  n_ctrl, pct_ctrl),
      ~ replace_na(., 0)
    )
  ) %>%
  arrange(desc(pct_main))


# 输出比较表
haplo_compare %>%
  knitr::kable(
    col.names = c("Haplogroup", 
                  "CRPS (n)", "CRPS (%)",
                 
                  "Control (n)", "Control (%)"),
    digits = 2
  )
Haplogroup CRPS (n) CRPS (%) Control (n) Control (%)
H 1241 43.47 1221 42.10
U 404 14.15 395 13.62
T 310 10.86 307 10.59
J 297 10.40 301 10.38
K 230 8.06 268 9.24
I 103 3.61 94 3.24
V 90 3.15 110 3.79
HV 63 2.21 58 2.00
W 48 1.68 65 2.24
X 32 1.12 38 1.31
N 9 0.32 11 0.38
R 9 0.32 6 0.21
L1 4 0.14 1 0.03
L3 4 0.14 5 0.17
L2 3 0.11 2 0.07
M 3 0.11 8 0.28
C 2 0.07 0 0.00
B 1 0.04 1 0.03
D 1 0.04 2 0.07
F 1 0.04 0 0.00
A 0 0.00 3 0.10
E 0 0.00 1 0.03
L4 0 0.00 1 0.03
P 0 0.00 1 0.03
Y 0 0.00 1 0.03
haplo_long <- haplo_compare %>%
  select(haplogroup, pct_main, pct_ctrl) %>%
  pivot_longer(cols = starts_with("pct"),
               names_to = "dataset", values_to = "percentage") %>%
  mutate(dataset = recode(dataset,
                          pct_main = "CRPS",
                          
                          pct_ctrl = "Control"))

p = ggplot(haplo_long, aes(x = reorder(haplogroup, -percentage),
                       y = percentage,
                       fill = dataset)) +
  geom_col(position = "dodge") +
  coord_flip() +
  labs(x = "Haplogroup", y = "Sample Proportion (%)",
       title = "Comparison of Haplogroup Distribution") +
  theme_minimal()

print(p)

ggsave(
  filename = "haplogroup_distribution.png",
  plot = p,
  width = 8,
  height = 6,
  dpi = 300
)