This report investigates mitochondrial genetic variation, specifically the 488T>C mutation (m.2158T>C, MT-RNR2/M-SHLP2, c.11A>G, p.Lys4Arg), in individuals with Complex Regional Pain Syndrome (CRPS) compared to pain-free controls, using whole-genome sequencing data from the UK Biobank.
CRPS cases: UK Biobank participants of White ancestry who reported a CRPS diagnosis with symptom duration greater than 1 year (pain_time: “1–5 years” or “More than 5 years”).
Controls: UK Biobank participants of White ancestry with no reported CRPS diagnosis.
Controls were selected via 1:1 age- and sex-stratified sampling to match the CRPS case distribution, minimising confounding by demographic differences.
# Load CRPS cases (same filters as sampling.Rmd)
raw <- read.csv("data/CRPS_control.csv", stringsAsFactors = FALSE)
colnames(raw) <- c("eid", "birthyear", "sex", "crps", "pain_time")
raw <- raw %>% mutate(age = 2026 - birthyear)
crps_cases <- raw %>%
filter(crps == "Yes", sex != "", pain_time %in% c("1-5 years", "More than 5 years")) %>%
drop_na(age, sex) %>%
mutate(group = "CRPS")
# Load matched controls
sampled_ctrl <- read.csv("data/sampled_crps_control.csv", stringsAsFactors = FALSE) %>%
mutate(group = "Control")
# Combine
cohort <- bind_rows(
crps_cases %>% select(eid, age, sex, group),
sampled_ctrl %>% select(eid, age, sex, group)
)
# Build Table 1
table1 <- cohort %>%
group_by(group) %>%
summarise(
N = n(),
Age_mean = round(mean(age, na.rm = TRUE), 1),
Age_sd = round(sd(age, na.rm = TRUE), 1),
Female_n = sum(sex == "Female"),
Female_pct = round(Female_n / N * 100, 1)
) %>%
mutate(
`Age, mean (SD)` = paste0(Age_mean, " (", Age_sd, ")"),
`Female, n (%)` = paste0(Female_n, " (", Female_pct, "%)")
) %>%
select(Group = group, N, `Age, mean (SD)`, `Female, n (%)`)
table1 %>%
knitr::kable(caption = "Table 1. Cohort characteristics") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| Group | N | Age, mean (SD) | Female, n (%) |
|---|---|---|---|
| CRPS | 1285 | 76.5 (7.3) | 787 (61.2%) |
| Control | 2917 | 76.4 (7.2) | 1786 (61.2%) |
First import data, then merge haplogroup information with genetic variant data. Extract m.2639 mutation status for all samples.
# Load clean haplogroup data
haplo_clean <- read_tsv("data/CRPS_summary_full/mutect2.haplogroup1.tab")
# Load VCF data
vcf <- read.vcfR("data/CRPS_summary_full/mutect2.03.merge.vcf")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 47
## header_line: 48
## variant count: 3873
## column count: 2869
## Meta line 47 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 3873
## Character matrix gt cols: 2869
## skip: 0
## nrows: 3873
## row_num: 0
## Processed variant 1000Processed variant 2000Processed variant 3000Processed variant: 3873
## All variants processed
# Clean sample IDs
haplo_clean <- haplo_clean %>%
mutate(IID_clean = str_split_i(Run, "_", 1))
# Extract 488T>C mutation (more frequently referred to as m.2158T>C)
m488_pos <- "2158"
m488_index <- which(vcf@fix[, "POS"] == m488_pos)
gt <- extract.gt(vcf, element = "GT")
m488_gt <- gt[m488_index, ]
mutation_data <- data.frame(
IID_vcf = names(m488_gt),
m488_mutation = ifelse(grepl("1", m488_gt), 1, 0)
) %>%
mutate(IID_clean = str_split_i(IID_vcf, "_", 1))
# Merge data
final_data <- haplo_clean %>%
left_join(mutation_data, by = "IID_clean") %>%
filter(!is.na(m488_mutation))
Summary the merge data:
total_samples <- nrow(final_data)
mutated_samples <- sum(final_data$m488_mutation)
mutation_rate <- mutated_samples / total_samples * 100
cat("**Total samples analyzed:**", total_samples, "\n\n")
## **Total samples analyzed:** 2855
cat("**Samples with m.488 mutation:**", mutated_samples, "\n\n")
## **Samples with m.488 mutation:** 39
cat("**Overall mutation frequency:**", round(mutation_rate, 2), "%\n")
## **Overall mutation frequency:** 1.37 %
Calculated mutation frequencies across haplogroups
haplo_summary <- final_data %>%
group_by(haplogroup) %>%
summarise(
n_samples = n(),
n_mutated = sum(m488_mutation),
mutation_freq = n_mutated / n_samples * 100
) %>%
arrange(desc(mutation_freq))
# Display top haplogroups with mutations
haplo_summary_mutated <- haplo_summary %>%
filter(n_mutated > 0)
haplo_summary %>%
knitr::kable(
col.names = c("Haplogroup", "Total Samples", "Mutated Samples", "Mutation Frequency (%)"),
digits = 2
)
| Haplogroup | Total Samples | Mutated Samples | Mutation Frequency (%) |
|---|---|---|---|
| J | 297 | 39 | 13.13 |
| B | 1 | 0 | 0.00 |
| C | 2 | 0 | 0.00 |
| D | 1 | 0 | 0.00 |
| F | 1 | 0 | 0.00 |
| H | 1241 | 0 | 0.00 |
| HV | 63 | 0 | 0.00 |
| I | 103 | 0 | 0.00 |
| K | 230 | 0 | 0.00 |
| L1 | 4 | 0 | 0.00 |
| L2 | 3 | 0 | 0.00 |
| L3 | 4 | 0 | 0.00 |
| M | 3 | 0 | 0.00 |
| N | 9 | 0 | 0.00 |
| R | 9 | 0 | 0.00 |
| T | 310 | 0 | 0.00 |
| U | 404 | 0 | 0.00 |
| V | 90 | 0 | 0.00 |
| W | 48 | 0 | 0.00 |
| X | 32 | 0 | 0.00 |
The m.488 mutation shows an high frequency (13.79%) within the J haplogroup.
This represents a significant enrichment pattern compared to other haplogroups.
Reference: https://academic.oup.com/innovateage/article/3/Supplement_1/S838/5618469?guestAccessKey=
First import data, then merge haplogroup information with genetic variant data. Extract m.2639 mutation status for all samples.
# Load clean haplogroup data
haplo_clean <- read_tsv(
"data/CRPS_control_summary/mutect2.haplogroup1.tab",
col_names = c("Run", "haplogroup")
)
# Load VCF data
vcf <- read.vcfR("data/CRPS_control_summary/mutect2.03.merge.vcf")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 47
## header_line: 48
## variant count: 3784
## column count: 2910
## Meta line 47 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 3784
## Character matrix gt cols: 2910
## skip: 0
## nrows: 3784
## row_num: 0
## Processed variant 1000Processed variant 2000Processed variant 3000Processed variant: 3784
## All variants processed
# Clean sample IDs
haplo_clean <- haplo_clean %>%
mutate(IID_clean = str_split_i(Run, "_", 1))
# Extract 488T>C mutation (more frequently referred to as m.2158T>C)
m488_pos <- "2158"
m488_index <- which(vcf@fix[, "POS"] == m488_pos)
gt <- extract.gt(vcf, element = "GT")
m488_gt <- gt[m488_index, ]
mutation_data <- data.frame(
IID_vcf = names(m488_gt),
m488_mutation = ifelse(grepl("1", m488_gt), 1, 0)
) %>%
mutate(IID_clean = str_split_i(IID_vcf, "_", 1))
# Merge data
final_data_ctrl <- haplo_clean %>%
left_join(mutation_data, by = "IID_clean") %>%
filter(!is.na(m488_mutation))
Summary the merge data:
total_samples_ctrl <- nrow(final_data_ctrl)
mutated_samples_ctrl <- sum(final_data_ctrl$m488_mutation)
mutation_rate_ctrl <- mutated_samples_ctrl/ total_samples_ctrl * 100
cat("**Total samples analyzed:**", total_samples_ctrl, "\n\n")
## **Total samples analyzed:** 2900
cat("**Samples with m.2639 mutation:**", mutated_samples_ctrl, "\n\n")
## **Samples with m.2639 mutation:** 36
cat("**Overall mutation frequency:**", round(mutation_rate_ctrl, 2), "%\n")
## **Overall mutation frequency:** 1.24 %
Calculated mutation frequencies across haplogroups
haplo_summary_ctrl <- final_data_ctrl %>%
group_by(haplogroup) %>%
summarise(
n_samples = n(),
n_mutated = sum(m488_mutation),
mutation_freq = n_mutated / n_samples * 100
) %>%
arrange(desc(mutation_freq))
# Display top haplogroups with mutations
haplo_summary_mutated_ctrl <- haplo_summary_ctrl %>%
filter(n_mutated > 0)
haplo_summary_ctrl %>%
knitr::kable(
col.names = c("Haplogroup", "Total Samples", "Mutated Samples", "Mutation Frequency (%)"),
digits = 2
)
| Haplogroup | Total Samples | Mutated Samples | Mutation Frequency (%) |
|---|---|---|---|
| J | 301 | 36 | 11.96 |
| A | 3 | 0 | 0.00 |
| B | 1 | 0 | 0.00 |
| D | 2 | 0 | 0.00 |
| E | 1 | 0 | 0.00 |
| H | 1221 | 0 | 0.00 |
| HV | 58 | 0 | 0.00 |
| I | 94 | 0 | 0.00 |
| K | 268 | 0 | 0.00 |
| L1 | 1 | 0 | 0.00 |
| L2 | 2 | 0 | 0.00 |
| L3 | 5 | 0 | 0.00 |
| L4 | 1 | 0 | 0.00 |
| M | 8 | 0 | 0.00 |
| N | 11 | 0 | 0.00 |
| P | 1 | 0 | 0.00 |
| R | 6 | 0 | 0.00 |
| T | 307 | 0 | 0.00 |
| U | 395 | 0 | 0.00 |
| V | 110 | 0 | 0.00 |
| W | 65 | 0 | 0.00 |
| X | 38 | 0 | 0.00 |
| Y | 1 | 0 | 0.00 |
overall_main <- haplo_summary %>%
summarise(
n_samples = sum(n_samples),
n_mutated = sum(n_mutated)
) %>%
mutate(dataset = "CRPS")
overall_ctrl <- haplo_summary_ctrl %>%
summarise(
n_samples = sum(n_samples),
n_mutated = sum(n_mutated)
) %>%
mutate(dataset = "Control")
overall_table <- bind_rows(
haplo_summary %>% mutate(dataset = "CRPS"),
haplo_summary_ctrl %>% mutate(dataset = "Control")
) %>%
summarise(
n_samples = sum(n_samples),
n_mutated = sum(n_mutated),
.by = dataset
) %>%
mutate(
n_non_mutated = n_samples - n_mutated,
mutation_pct = n_mutated / n_samples * 100
) %>%
select(
dataset,
n_samples,
n_mutated,
n_non_mutated,
mutation_pct
)
overall_table %>%
knitr::kable(
col.names = c(
"Dataset",
"Total samples",
"Mutated (n)",
"Non-mutated (n)",
"Mutation rate (%)"
),
digits = 2
)
| Dataset | Total samples | Mutated (n) | Non-mutated (n) | Mutation rate (%) |
|---|---|---|---|---|
| CRPS | 2855 | 39 | 2816 | 1.37 |
| Control | 2900 | 36 | 2864 | 1.24 |
haplo_J_table <- bind_rows(
haplo_summary %>% mutate(dataset = "CRPS"),
haplo_summary_ctrl %>% mutate(dataset = "Control")
) %>%
filter(haplogroup == "J") %>%
mutate(
n_non_mutated = n_samples - n_mutated,
mutation_pct = n_mutated / n_samples * 100
) %>%
select(dataset, n_samples, n_mutated, n_non_mutated, mutation_pct)
haplo_J_table %>%
knitr::kable(
col.names = c(
"Dataset",
"Total J samples",
"Mutated (n)",
"Non-mutated (n)",
"Mutation rate (%)"
),
digits = 2
)
| Dataset | Total J samples | Mutated (n) | Non-mutated (n) | Mutation rate (%) |
|---|---|---|---|---|
| CRPS | 297 | 39 | 258 | 13.13 |
| Control | 301 | 36 | 265 | 11.96 |
contingency_J <- matrix(
c(39, 258, # CRPS
36, 265), # Dolorisk/Control
nrow = 2,
byrow = TRUE
)
rownames(contingency_J) <- c("CRPS", "Control")
colnames(contingency_J) <- c("Mutated", "Non_mutated")
fisher.test(contingency_J)
##
## Fisher's Exact Test for Count Data
##
## data: contingency_J
## p-value = 0.7118
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.6656925 1.8634559
## sample estimates:
## odds ratio
## 1.112527
# 计算 haplogroup 分布比例
haplo_dist_main <- final_data %>%
count(haplogroup, name = "n_main") %>%
mutate(pct_main = n_main / sum(n_main) * 100)
haplo_dist_ctrl <- final_data_ctrl %>%
count(haplogroup, name = "n_ctrl") %>%
mutate(pct_ctrl = n_ctrl / sum(n_ctrl) * 100)
# 合并
haplo_compare <- haplo_dist_main %>%
full_join(haplo_dist_ctrl, by = "haplogroup") %>%
mutate(
across(
c(n_main, pct_main, n_ctrl, pct_ctrl),
~ replace_na(., 0)
)
) %>%
arrange(desc(pct_main))
# 输出比较表
haplo_compare %>%
knitr::kable(
col.names = c("Haplogroup",
"CRPS (n)", "CRPS (%)",
"Control (n)", "Control (%)"),
digits = 2
)
| Haplogroup | CRPS (n) | CRPS (%) | Control (n) | Control (%) |
|---|---|---|---|---|
| H | 1241 | 43.47 | 1221 | 42.10 |
| U | 404 | 14.15 | 395 | 13.62 |
| T | 310 | 10.86 | 307 | 10.59 |
| J | 297 | 10.40 | 301 | 10.38 |
| K | 230 | 8.06 | 268 | 9.24 |
| I | 103 | 3.61 | 94 | 3.24 |
| V | 90 | 3.15 | 110 | 3.79 |
| HV | 63 | 2.21 | 58 | 2.00 |
| W | 48 | 1.68 | 65 | 2.24 |
| X | 32 | 1.12 | 38 | 1.31 |
| N | 9 | 0.32 | 11 | 0.38 |
| R | 9 | 0.32 | 6 | 0.21 |
| L1 | 4 | 0.14 | 1 | 0.03 |
| L3 | 4 | 0.14 | 5 | 0.17 |
| L2 | 3 | 0.11 | 2 | 0.07 |
| M | 3 | 0.11 | 8 | 0.28 |
| C | 2 | 0.07 | 0 | 0.00 |
| B | 1 | 0.04 | 1 | 0.03 |
| D | 1 | 0.04 | 2 | 0.07 |
| F | 1 | 0.04 | 0 | 0.00 |
| A | 0 | 0.00 | 3 | 0.10 |
| E | 0 | 0.00 | 1 | 0.03 |
| L4 | 0 | 0.00 | 1 | 0.03 |
| P | 0 | 0.00 | 1 | 0.03 |
| Y | 0 | 0.00 | 1 | 0.03 |
haplo_long <- haplo_compare %>%
select(haplogroup, pct_main, pct_ctrl) %>%
pivot_longer(cols = starts_with("pct"),
names_to = "dataset", values_to = "percentage") %>%
mutate(dataset = recode(dataset,
pct_main = "CRPS",
pct_ctrl = "Control"))
p = ggplot(haplo_long, aes(x = reorder(haplogroup, -percentage),
y = percentage,
fill = dataset)) +
geom_col(position = "dodge") +
coord_flip() +
labs(x = "Haplogroup", y = "Sample Proportion (%)",
title = "Comparison of Haplogroup Distribution") +
theme_minimal()
print(p)
ggsave(
filename = "haplogroup_distribution.png",
plot = p,
width = 8,
height = 6,
dpi = 300
)