snps_af_142 <- read_xlsx("PGS SNCA TRIP12 adjustment 26_4_22.xlsx", sheet = 1, range = "A1:N143") %>%
dplyr::select("snp", "gene", "log2_afc") %>%
dplyr::rename(rsID=snp) %>%
mutate(log2_afc = ifelse(gene %in% c("SNCA", "TRIP12"), -log2_afc, log2_afc)) # reverse the effect for the 2 genes
genotypes <- read_tsv(file = "/Users/rsultana/work/amppd/vcfs/amppd_149SNPs_genotypes.txt") %>%
mutate(
rsID = if_else(rsID == ".", "rs28691231", rsID),
GT = gsub("|", "/", GT, fixed = T)
) %>%
mutate(GENOTYPE = if_else((GT == "0/0") |
(rsID == "rs112957100" & GT == "0/1") |
(rsID == "rs75214905" & GT == "0/2"),
0, # all homozygous ref alleles and the the hets that have not been designed against contribute 0 to the score
if_else(GT == '1/1',
2, # the homozygous alt alleles contribute 2
1) # the heterozygous alt alleles contribute 1
)
)
scores_all_cohorts <- genotypes %>%
inner_join(snps_af_142, by = "rsID") %>%
filter(!is.na(GENOTYPE)) %>%
mutate(term = GENOTYPE * log2_afc) %>%
drop_na(term) %>%
group_by(participant_id) %>%
summarize(score = sum(term))
amppd_metadata <- read_tsv("/Users/rsultana/work/AMP-PD_MGRB_joint_call/amp-pd-nogenetic_for_MGRBJoint_calling.txt")
amppd_metadata %>% mutate(prefix=substr(PID_MRGB, 1, 2)) %>% count(prefix)
amppd_metadata %>% filter(is.na(Dataset)) %>% mutate(prefix=substr(PID_MRGB, 1, 2)) %>% count(prefix)
amppd_metadata <- amppd_metadata %>%
rename(participant_id = PID_MRGB, phenotype = Phenotype) %>%
mutate(phenotype = if_else(!is.na(phenotype), phenotype, "Other")) %>%
mutate(prefix = substr(participant_id, 1, 2)) %>%
mutate(Dataset = case_when(!is.na(Dataset) ~ Dataset,
prefix=="HB" ~ "HBS",
prefix=="SU" ~ "SURE-PD3",
prefix=="SY" ~ "STEADY-PD3")
) %>%
select(-prefix)
amppd_metadata %>% count(Dataset)
scores1 <- scores_all_cohorts %>%
inner_join(amppd_metadata, by = "participant_id") %>%
filter(Dataset == "BioFind")
scores1 %>% group_by(phenotype) %>% summarize(median_score = median(score))
n <- dim(scores1)[1]
mycols <- pal_nejm("default", alpha = 1)(2)
my_grob_test <- grobTree(textGrob("test = wilcox.test",
x = 0.8,
y = 0.95, hjust = 0, gp = gpar(col = "black", fontsize = 9, fontface = "italic")
))
ggplot(scores1, aes(x = phenotype, y = score, fill = phenotype)) +
geom_boxplot(fill = mycols, alpha = 0.3) +
geom_jitter(color = "black", width = 0.2, size = 0.7, alpha = 0.9, show.legend = FALSE) +
geom_signif(comparisons = list(c("Control", "Case")), map_signif_level = TRUE, test = wilcox.test) +
annotation_custom(my_grob_test) +
ggtitle("BioFind: Predicted PGS for GBA expression from 142 SNPs",
subtitle = paste("(", n , "participants)")
)
scores1 <- scores_all_cohorts %>%
inner_join(amppd_metadata, by = "participant_id") %>%
filter((Dataset == 'HBS') |(!is.na(PID_HBS)))
scores1 %>% group_by(phenotype) %>% summarize(median_score = median(score))
n <- dim(scores1)[1]
mycols <- pal_nejm("default", alpha = 1)(3)
my_grob_test <- grobTree(textGrob("test = wilcox.test",
x = 0.8,
y = 0.95, hjust = 0, gp = gpar(col = "black", fontsize = 9, fontface = "italic")
))
ggplot(scores1, aes(x = phenotype, y = score, fill = phenotype)) +
geom_boxplot(fill = mycols, alpha = 0.3) +
geom_jitter(color = "black", width = 0.2, size = 0.7, alpha = 0.9, show.legend = FALSE) +
geom_signif(comparisons = list(c("Control", "Case")), map_signif_level = TRUE, test = wilcox.test) +
annotation_custom(my_grob_test) +
ggtitle("HBS: Predicted PGS for GBA expression from 142 SNPs",
subtitle = paste("(", n , "participants)")
)
scores1 <- scores_all_cohorts %>%
inner_join(amppd_metadata, by = "participant_id") %>%
filter((Dataset == 'PDBP'))
scores1 %>% group_by(phenotype) %>% summarize(median_score = median(score))
n <- dim(scores1)[1]
mycols <- pal_nejm("default", alpha = 1)(2)
my_grob_test <- grobTree(textGrob("test = wilcox.test",
x = 0.8,
y = 0.95, hjust = 0, gp = gpar(col = "black", fontsize = 9, fontface = "italic")
))
ggplot(scores1, aes(x = phenotype, y = score, fill = phenotype)) +
geom_boxplot(fill = mycols, alpha = 0.3) +
geom_jitter(color = "black", width = 0.2, size = 0.7, alpha = 0.9, show.legend = FALSE) +
geom_signif(comparisons = list(c("Control", "Case")), map_signif_level = TRUE, test = wilcox.test) +
annotation_custom(my_grob_test) +
ggtitle("PDBP: Predicted PGS for GBA expression from 142 SNPs",
subtitle = paste("(", n , "participants)")
)
ggplot(scores1, aes(sample = score, color = phenotype)) +
facet_grid(cols = vars(phenotype)) +
stat_qq(show.legend = FALSE) +
geom_qq_line(color="black") +
theme_bw()
ggplot(scores1, aes(x = score, color = phenotype)) +
facet_grid(cols = vars(phenotype)) +
geom_density(show.legend = FALSE) +
stat_function(
fun = dnorm,
args = with(scores1, c(mean = mean(score), sd = sd(score))),
color = "black"
) +
scale_x_continuous("polygenic score")
scores1 <- scores_all_cohorts %>%
inner_join(amppd_metadata, by = "participant_id") %>%
filter(Dataset == 'PPMI')
scores1 %>% group_by(phenotype) %>% summarize(median_score = median(score))
n <- dim(scores1)[1]
mycols <- pal_nejm("default", alpha = 1)(2)
my_grob_test <- grobTree(textGrob("test = wilcox.test",
x = 0.8,
y = 0.95, hjust = 0, gp = gpar(col = "black", fontsize = 9, fontface = "italic")
))
ggplot(scores1, aes(x = phenotype, y = score, fill = phenotype)) +
geom_boxplot(fill = mycols, alpha = 0.3) +
geom_jitter(color = "black", width = 0.2, size = 0.7, alpha = 0.9, show.legend = FALSE) +
geom_signif(comparisons = list(c("Control", "Case")), map_signif_level = TRUE, test = wilcox.test) +
annotation_custom(my_grob_test) +
ggtitle("PPMI: Predicted PGS for GBA expression from 142 SNPs",
subtitle = paste("(", n , "participants)")
)
scores1 <- scores_all_cohorts %>%
inner_join(amppd_metadata, by = "participant_id") %>%
filter((Dataset == 'STEADY-PD3') |(!is.na(PID_STEADYPD3)))
scores1 %>% group_by(phenotype) %>% summarize(median_score = median(score))
n <- dim(scores1)[1]
mycols <- pal_nejm("default", alpha = 1)(3)
my_grob_test <- grobTree(textGrob("test = wilcox.test",
x = 0.8,
y = 0.95, hjust = 0, gp = gpar(col = "black", fontsize = 9, fontface = "italic")
))
ggplot(scores1, aes(x = phenotype, y = score, fill = phenotype)) +
geom_boxplot(fill = mycols, alpha = 0.3) +
geom_jitter(color = "black", width = 0.2, size = 0.7, alpha = 0.9, show.legend = FALSE) +
geom_signif(comparisons = list(c("Control", "Case")), map_signif_level = TRUE, test = wilcox.test) +
annotation_custom(my_grob_test) +
ggtitle("STEADY-PD3: Predicted PGS for GBA expression from 142 SNPs",
subtitle = paste("(", n , "participants)")
)
scores1 <- scores_all_cohorts %>%
inner_join(amppd_metadata, by = "participant_id") %>%
filter((Dataset == 'SURE-PD3') |(!is.na(PID_SUREPD3)))
scores1 %>% group_by(phenotype) %>% summarize(median_score = median(score))
n <- dim(scores1)[1]
mycols <- pal_nejm("default", alpha = 1)(2)
my_grob_test <- grobTree(textGrob("test = wilcox.test",
x = 0.8,
y = 0.95, hjust = 0, gp = gpar(col = "black", fontsize = 9, fontface = "italic")
))
ggplot(scores1, aes(x = phenotype, y = score, fill = phenotype)) +
geom_boxplot(fill = mycols, alpha = 0.3) +
geom_jitter(color = "black", width = 0.2, size = 0.7, alpha = 0.9, show.legend = FALSE) +
geom_signif(comparisons = list(c("Control", "Case")), map_signif_level = TRUE, test = wilcox.test) +
annotation_custom(my_grob_test) +
ggtitle("SURE-PD3: Predicted PGS for GBA expression from 142 SNPs",
subtitle = paste("(", n , "participants)")
)
genotypes <- read_tsv("48samples_149SNPs_genotypes_by_rsID.txt") %>%
mutate(
rsID = if_else(rsID == ".", "rs28691231", rsID),
GT = gsub("|", "/", GT, fixed = T)
) %>%
mutate(GENOTYPE = if_else((GT == "0/0" |
(rsID == "rs112957100" & GT == "0/1") |
(rsID == "rs75214905" & GT == "0/2")),
0,
if_else(GT == "1/1",
2,
1
)
))
dict <- read_tsv("48samples_dict.tsv")
reversed_effect_snps <- snps_af_142 %>% filter(gene %in% c("SNCA", "TRIP12")) %>% select(rsID) %>% unlist()
snps_af_96 <- read_tsv(file = "~/work/parkons_disease_a-cooper/100_SNPs/R/96Snps.log2Afc.tsv") %>%
rename(rsID = snp_id)
scores1 <- genotypes %>%
inner_join(snps_af_96, by = "rsID") %>%
filter(!is.na(GENOTYPE)) %>%
mutate(term = GENOTYPE * log2_afc) %>%
drop_na(term) %>%
group_by(sample) %>%
summarize(score = sum(term)) %>%
inner_join(dict, by = "sample")
scores2 <- genotypes %>%
inner_join(snps_af_142, by = "rsID") %>%
filter(!is.na(GENOTYPE)) %>%
mutate(term = GENOTYPE * log2_afc) %>%
drop_na(term) %>%
group_by(sample) %>%
summarize(score = sum(term)) %>%
inner_join(dict, by = "sample")
all_scores <- (scores1 %>% mutate(dataset = "96SNPs")) %>%
rbind(scores2 %>% mutate(dataset = "142SNPs"))
mycols <- pal_nejm("default", alpha = 1)(2)
ggplot(all_scores, aes(dataset, score, fill = dataset)) +
geom_boxplot(fill = mycols, alpha = 0.3) +
geom_label(aes(label = letter), position = "jitter", alpha = 0.75) +
# geom_jitter(color="black", width = 0.2, size=0.7, alpha=0.9, show.legend = FALSE) +
ggtitle("Predicted PGS for GBA expression from 142 SNPs",
subtitle = "Scores for weighted additive model with 142 and 96 markers"
)
scores12 <- scores1 %>%
inner_join(scores2, by=c("sample", "letter")) %>%
rename(score96 = score.x, score142 = score.y) %>%
relocate("sample", "letter")
write_csv(scores12, file="score96_vs_score142.csv")
scores12
Download this table: score96_vs_score142.csv
gg <- scores12 %>%
ggplot( aes(score96, score142, text=letter, key=sample, shape=as.factor(3))) +
xlim(-20, 20) +
ylim(-20, 20) +
geom_point(colour=mycols[1], alpha=0.5, size=4) +
theme(legend.position='none')
ggplotly(gg, tooltip=c('sample', 'letter', 'score96', 'score142'))