CRPS Mutation Analysis Report

Introduction

This report investigates mitochondrial genetic variation, specifically the 488T>C mutation (m.2158T>C, MT-RNR2/M-SHLP2, c.11A>G, p.Lys4Arg), in individuals with Complex Regional Pain Syndrome (CRPS) compared to pain-free controls, using whole-genome sequencing data from the UK Biobank.

Study Cohorts

CRPS cases: UK Biobank participants of White ancestry who reported a CRPS diagnosis with symptom duration greater than 1 year (pain_time: “1–5 years” or “More than 5 years”).

Controls: UK Biobank participants of White ancestry with no reported CRPS diagnosis.

Controls were selected via 1:1 age- and sex-stratified sampling to match the CRPS case distribution, minimising confounding by demographic differences.

Table 1. Cohort Characterisation

# Load CRPS cases (same filters as sampling.Rmd)
raw <- read.csv("data/CRPS_control.csv", stringsAsFactors = FALSE)
colnames(raw) <- c("eid", "birthyear", "sex", "crps", "pain_time")
raw <- raw %>% mutate(age = 2026 - birthyear)

crps_cases <- raw %>%
  filter(crps == "Yes", sex != "", pain_time %in% c("1-5 years", "More than 5 years")) %>%
  drop_na(age, sex) %>%
  mutate(group = "CRPS")

# Load matched controls
sampled_ctrl <- read.csv("data/sampled_crps_control.csv", stringsAsFactors = FALSE) %>%
  mutate(group = "Control")

# Combine
cohort <- bind_rows(
  crps_cases %>% select(eid, age, sex, group),
  sampled_ctrl %>% select(eid, age, sex, group)
)

# Build Table 1
table1 <- cohort %>%
  group_by(group) %>%
  summarise(
    N = n(),
    Age_mean = round(mean(age, na.rm = TRUE), 1),
    Age_sd   = round(sd(age,  na.rm = TRUE), 1),
    Female_n = sum(sex == "Female"),
    Female_pct = round(Female_n / N * 100, 1)
  ) %>%
  mutate(
    `Age, mean (SD)` = paste0(Age_mean, " (", Age_sd, ")"),
    `Female, n (%)`  = paste0(Female_n, " (", Female_pct, "%)")
  ) %>%
  select(Group = group, N, `Age, mean (SD)`, `Female, n (%)`)

table1 %>%
  knitr::kable(caption = "Table 1. Cohort characteristics") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Table 1. Cohort characteristics
Group	N	Age, mean (SD)	Female, n (%)
CRPS	1285	76.5 (7.3)	787 (61.2%)
Control	2917	76.4 (7.2)	1786 (61.2%)

UKBiobank

Data processing

First import data, then merge haplogroup information with genetic variant data. Extract m.2639 mutation status for all samples.

# Load clean haplogroup data
haplo_clean <- read_tsv("data/CRPS_summary_full/mutect2.haplogroup1.tab")

# Load VCF data
vcf <- read.vcfR("data/CRPS_summary_full/mutect2.03.merge.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 47
##   header_line: 48
##   variant count: 3873
##   column count: 2869
## Meta line 47 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3873
##   Character matrix gt cols: 2869
##   skip: 0
##   nrows: 3873
##   row_num: 0
## Processed variant 1000Processed variant 2000Processed variant 3000Processed variant: 3873
## All variants processed

# Clean sample IDs
haplo_clean <- haplo_clean %>%
  mutate(IID_clean = str_split_i(Run, "_", 1))

# Extract 488T>C mutation (more frequently referred to as m.2158T>C)
m488_pos <- "2158"
m488_index <- which(vcf@fix[, "POS"] == m488_pos)
gt <- extract.gt(vcf, element = "GT")
m488_gt <- gt[m488_index, ]

mutation_data <- data.frame(
  IID_vcf = names(m488_gt),
  m488_mutation = ifelse(grepl("1", m488_gt), 1, 0)
) %>%
  mutate(IID_clean = str_split_i(IID_vcf, "_", 1))

# Merge data
final_data <- haplo_clean %>%
  left_join(mutation_data, by = "IID_clean") %>%
  filter(!is.na(m488_mutation))

Summary the merge data:

total_samples <- nrow(final_data)
mutated_samples <- sum(final_data$m488_mutation)
mutation_rate <- mutated_samples / total_samples * 100

cat("**Total samples analyzed:**", total_samples, "\n\n")

## **Total samples analyzed:** 2855

cat("**Samples with m.488 mutation:**", mutated_samples, "\n\n")

## **Samples with m.488 mutation:** 39

cat("**Overall mutation frequency:**", round(mutation_rate, 2), "%\n")

## **Overall mutation frequency:** 1.37 %

Calculated mutation frequencies across haplogroups

haplo_summary <- final_data %>%
  group_by(haplogroup) %>%
  summarise(
    n_samples = n(),
    n_mutated = sum(m488_mutation),
    mutation_freq = n_mutated / n_samples * 100
  ) %>%
  arrange(desc(mutation_freq))

# Display top haplogroups with mutations
haplo_summary_mutated <- haplo_summary %>%
  filter(n_mutated > 0)



haplo_summary %>%
  knitr::kable(
    col.names = c("Haplogroup", "Total Samples", "Mutated Samples", "Mutation Frequency (%)"),
    digits = 2
  )

Haplogroup	Total Samples	Mutated Samples	Mutation Frequency (%)
J	297	39	13.13
B	1	0	0.00
C	2	0	0.00
D	1	0	0.00
F	1	0	0.00
H	1241	0	0.00
HV	63	0	0.00
I	103	0	0.00
K	230	0	0.00
L1	4	0	0.00
L2	3	0	0.00
L3	4	0	0.00
M	3	0	0.00
N	9	0	0.00
R	9	0	0.00
T	310	0	0.00
U	404	0	0.00
V	90	0	0.00
W	48	0	0.00
X	32	0	0.00

Specific Enrichment in J Haplogroup

The m.488 mutation shows an high frequency (13.79%) within the J haplogroup.

This represents a significant enrichment pattern compared to other haplogroups.

Reference: https://academic.oup.com/innovateage/article/3/Supplement_1/S838/5618469?guestAccessKey=

Control

Data processing

First import data, then merge haplogroup information with genetic variant data. Extract m.2639 mutation status for all samples.

# Load clean haplogroup data

haplo_clean <- read_tsv(
  "data/CRPS_control_summary/mutect2.haplogroup1.tab",
  col_names = c("Run", "haplogroup")
)

# Load VCF data
vcf <- read.vcfR("data/CRPS_control_summary/mutect2.03.merge.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 47
##   header_line: 48
##   variant count: 3784
##   column count: 2910
## Meta line 47 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3784
##   Character matrix gt cols: 2910
##   skip: 0
##   nrows: 3784
##   row_num: 0
## Processed variant 1000Processed variant 2000Processed variant 3000Processed variant: 3784
## All variants processed

# Clean sample IDs
haplo_clean <- haplo_clean %>%
  mutate(IID_clean = str_split_i(Run, "_", 1))

# Extract 488T>C mutation (more frequently referred to as m.2158T>C)
m488_pos <- "2158"
m488_index <- which(vcf@fix[, "POS"] == m488_pos)
gt <- extract.gt(vcf, element = "GT")
m488_gt <- gt[m488_index, ]

mutation_data <- data.frame(
  IID_vcf = names(m488_gt),
  m488_mutation = ifelse(grepl("1", m488_gt), 1, 0)
) %>%
  mutate(IID_clean = str_split_i(IID_vcf, "_", 1))

# Merge data
final_data_ctrl <- haplo_clean %>%
  left_join(mutation_data, by = "IID_clean") %>%
  filter(!is.na(m488_mutation))

Summary the merge data:

total_samples_ctrl <- nrow(final_data_ctrl)
mutated_samples_ctrl <- sum(final_data_ctrl$m488_mutation)
mutation_rate_ctrl <- mutated_samples_ctrl/ total_samples_ctrl * 100

cat("**Total samples analyzed:**", total_samples_ctrl, "\n\n")

## **Total samples analyzed:** 2900

cat("**Samples with m.2639 mutation:**", mutated_samples_ctrl, "\n\n")

## **Samples with m.2639 mutation:** 36

cat("**Overall mutation frequency:**", round(mutation_rate_ctrl, 2), "%\n")

## **Overall mutation frequency:** 1.24 %

Calculated mutation frequencies across haplogroups

haplo_summary_ctrl <- final_data_ctrl %>%
  group_by(haplogroup) %>%
  summarise(
    n_samples = n(),
    n_mutated = sum(m488_mutation),
    mutation_freq = n_mutated / n_samples * 100
  ) %>%
  arrange(desc(mutation_freq))

# Display top haplogroups with mutations
haplo_summary_mutated_ctrl <- haplo_summary_ctrl %>%
  filter(n_mutated > 0)



haplo_summary_ctrl %>%
  knitr::kable(
    col.names = c("Haplogroup", "Total Samples", "Mutated Samples", "Mutation Frequency (%)"),
    digits = 2
  )

Haplogroup	Total Samples	Mutated Samples	Mutation Frequency (%)
J	301	36	11.96
A	3	0	0.00
B	1	0	0.00
D	2	0	0.00
E	1	0	0.00
H	1221	0	0.00
HV	58	0	0.00
I	94	0	0.00
K	268	0	0.00
L1	1	0	0.00
L2	2	0	0.00
L3	5	0	0.00
L4	1	0	0.00
M	8	0	0.00
N	11	0	0.00
P	1	0	0.00
R	6	0	0.00
T	307	0	0.00
U	395	0	0.00
V	110	0	0.00
W	65	0	0.00
X	38	0	0.00
Y	1	0	0.00

Comparison of mutation

overall_main <- haplo_summary %>%
  summarise(
    n_samples = sum(n_samples),
    n_mutated = sum(n_mutated)
  ) %>%
  mutate(dataset = "CRPS")


overall_ctrl <- haplo_summary_ctrl %>%
  summarise(
    n_samples = sum(n_samples),
    n_mutated = sum(n_mutated)
  ) %>%
  mutate(dataset = "Control")

overall_table <- bind_rows(
  haplo_summary  %>% mutate(dataset = "CRPS"),

  haplo_summary_ctrl  %>% mutate(dataset = "Control")
) %>%
  summarise(
    n_samples = sum(n_samples),
    n_mutated = sum(n_mutated),
    .by = dataset
  ) %>%
  mutate(
    n_non_mutated = n_samples - n_mutated,
    mutation_pct = n_mutated / n_samples * 100
  ) %>%
  select(
    dataset,
    n_samples,
    n_mutated,
    n_non_mutated,
    mutation_pct
  )

overall_table %>%
  knitr::kable(
    col.names = c(
      "Dataset",
      "Total samples",
      "Mutated (n)",
      "Non-mutated (n)",
      "Mutation rate (%)"
    ),
    digits = 2
  )

Dataset	Total samples	Mutated (n)	Non-mutated (n)	Mutation rate (%)
CRPS	2855	39	2816	1.37
Control	2900	36	2864	1.24

haplo_J_table <- bind_rows(
  haplo_summary %>% mutate(dataset = "CRPS"),

  haplo_summary_ctrl %>% mutate(dataset = "Control")
) %>%
  filter(haplogroup == "J") %>%
  mutate(
    n_non_mutated = n_samples - n_mutated,
    mutation_pct = n_mutated / n_samples * 100
  ) %>%
  select(dataset, n_samples, n_mutated, n_non_mutated, mutation_pct)

haplo_J_table %>%
  knitr::kable(
    col.names = c(
      "Dataset",
      "Total J samples",
      "Mutated (n)",
      "Non-mutated (n)",
      "Mutation rate (%)"
    ),
    digits = 2
  )

Dataset	Total J samples	Mutated (n)	Non-mutated (n)	Mutation rate (%)
CRPS	297	39	258	13.13
Control	301	36	265	11.96

Test

contingency_J <- matrix(
  c(39, 258,   # CRPS
    36, 265),  # Dolorisk/Control
  nrow = 2,
  byrow = TRUE
)
rownames(contingency_J) <- c("CRPS", "Control")
colnames(contingency_J) <- c("Mutated", "Non_mutated")
fisher.test(contingency_J)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  contingency_J
## p-value = 0.7118
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.6656925 1.8634559
## sample estimates:
## odds ratio 
##   1.112527

Comparison of Haplogroup frequency

Haplogroup distribution

# 计算 haplogroup 分布比例
haplo_dist_main <- final_data %>%
  count(haplogroup, name = "n_main") %>%
  mutate(pct_main = n_main / sum(n_main) * 100)


haplo_dist_ctrl <- final_data_ctrl %>%
  count(haplogroup, name = "n_ctrl") %>%
  mutate(pct_ctrl = n_ctrl / sum(n_ctrl) * 100)

# 合并

haplo_compare <- haplo_dist_main %>%
  full_join(haplo_dist_ctrl, by = "haplogroup") %>%
  mutate(
    across(
      c(n_main, pct_main,  n_ctrl, pct_ctrl),
      ~ replace_na(., 0)
    )
  ) %>%
  arrange(desc(pct_main))


# 输出比较表
haplo_compare %>%
  knitr::kable(
    col.names = c("Haplogroup", 
                  "CRPS (n)", "CRPS (%)",
                 
                  "Control (n)", "Control (%)"),
    digits = 2
  )

Haplogroup	CRPS (n)	CRPS (%)	Control (n)	Control (%)
H	1241	43.47	1221	42.10
U	404	14.15	395	13.62
T	310	10.86	307	10.59
J	297	10.40	301	10.38
K	230	8.06	268	9.24
I	103	3.61	94	3.24
V	90	3.15	110	3.79
HV	63	2.21	58	2.00
W	48	1.68	65	2.24
X	32	1.12	38	1.31
N	9	0.32	11	0.38
R	9	0.32	6	0.21
L1	4	0.14	1	0.03
L3	4	0.14	5	0.17
L2	3	0.11	2	0.07
M	3	0.11	8	0.28
C	2	0.07	0	0.00
B	1	0.04	1	0.03
D	1	0.04	2	0.07
F	1	0.04	0	0.00
A	0	0.00	3	0.10
E	0	0.00	1	0.03
L4	0	0.00	1	0.03
P	0	0.00	1	0.03
Y	0	0.00	1	0.03

haplo_long <- haplo_compare %>%
  select(haplogroup, pct_main, pct_ctrl) %>%
  pivot_longer(cols = starts_with("pct"),
               names_to = "dataset", values_to = "percentage") %>%
  mutate(dataset = recode(dataset,
                          pct_main = "CRPS",
                          
                          pct_ctrl = "Control"))

p = ggplot(haplo_long, aes(x = reorder(haplogroup, -percentage),
                       y = percentage,
                       fill = dataset)) +
  geom_col(position = "dodge") +
  coord_flip() +
  labs(x = "Haplogroup", y = "Sample Proportion (%)",
       title = "Comparison of Haplogroup Distribution") +
  theme_minimal()

print(p)

ggsave(
  filename = "haplogroup_distribution.png",
  plot = p,
  width = 8,
  height = 6,
  dpi = 300
)

CRPS Mutation Analysis Report

Hening Cui

April 20, 2026

Introduction

Study Cohorts

Table 1. Cohort Characterisation

UKBiobank

Data processing

Specific Enrichment in J Haplogroup

Control

Data processing

Comparison of mutation

Test

Comparison of Haplogroup frequency

Haplogroup distribution