This a Prognostic Polygenic Risk Score based off Lee et al. (2017)
# Read in the File and remove patients without clinical information
prognostic_everything <- read.table("prognostic_everything_2.profile", header = T)
pcs <- read.table("merged_pca_Shellfish.evecs")
fam <- read.table("merged_pca.fam")
pcs <- t(pcs) %>%
as.data.frame() %>%
mutate(IID = fam$V2) %>%
relocate(IID)
prognostic_everything <- prognostic_everything %>%
mutate(exp_SCORE = exp(SCORE),
status = as.factor(ifelse(grepl("TRIAD", IID), 1, 0))) %>%
left_join(pcs)
## Joining with `by = join_by(IID)`
clinical_data_prognostic <- read.csv("update_clinical_data_corrected.csv")
merged_clinical_prognostic_initial <- merge(prognostic_everything, clinical_data_prognostic, by.x = "IID", by.y = "Patient_ID", all.x = TRUE, all.y = FALSE)
merged_clinical_prognostic <- subset(merged_clinical_prognostic_initial, grepl("TRIAD", merged_clinical_prognostic_initial$IID))
clinical_prognostic <- subset(merged_clinical_prognostic, !is.na(merged_clinical_prognostic$DOB))
# Adding extra information about CD vs UC from another file
extra_clinical <- read.csv("IBD_ClinicalMetadata_forTE.csv")
diagnosis_clinical <- data.frame(extra_clinical$Patient_ID, extra_clinical$Diagnosis)
colnames(diagnosis_clinical) <- c("IID", "Diagnosis")
clinical_prognostic <- merge(clinical_prognostic, diagnosis_clinical, by = "IID", all.x = TRUE)
clinical_prognostic_CD <- subset(clinical_prognostic, Diagnosis == 'CD')
Maximal Treatment is looking at what is the highest level of treatment that the individual has received, and whether that is associated with PRS
filtered_maximal <- subset(clinical_prognostic, Maximal_Treatment %in% c("Steroid", "Immunosuppressant", "Biologic", "Surgery"))
ggplot(filtered_maximal, aes(x = factor(Maximal_Treatment, levels = c("Steroid", "Immunosuppressant", "Biologic", "Surgery")), y = exp_SCORE, fill = factor(Maximal_Treatment, levels = c("Steroid", "Immunosuppressant", "Biologic", "Surgery")))) +
geom_boxplot() +
labs(x = "Maximal Treatment", y = "Prognostic PRS") +
geom_point(position = position_jitterdodge(), color = "black", size = 1) +
scale_color_npg() + guides(fill = "none") +
theme(axis.text.x = element_text(size = 12))
Within Maximal Treatments, I am looking at the effects of surgeries vs no surgeries on the effect of PRS
prognostic_surgeries <- subset(clinical_prognostic, !is.na(clinical_prognostic$Total_IBD_surgeries))
box_surgeries <- ggplot(prognostic_surgeries, aes(x = as.character(Total_IBD_surgeries), y = exp_SCORE, fill = as.character(Total_IBD_surgeries))) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 1.5, pch = 21, stroke = 0.7, fill = "white") +
theme_bw() +
theme(legend.position = "none") +
labs(y = "Prognostic PRS Score", x = "Surgery vs No Surgery") +
scale_color_npg() +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")
density_surgeries <- ggplot(prognostic_surgeries, aes(x = exp_SCORE, group = as.character(Total_IBD_surgeries))) +
geom_density(aes(fill = as.character(Total_IBD_surgeries)), alpha = 0.7) +
scale_fill_manual(name = "Surgery Status", values = c("grey70", "grey20")) +
guides(fill = "none") +
theme_bw() +
labs(x = "Prognostic PRS", y = "Density")
ggarrange(box_surgeries, density_surgeries, ncol = 1, nrow = 2, labels = c("A", "B"), heights = c(1, 0.5))
prognostic_surgeries_glm <- glm(formula = Total_IBD_surgeries ~ exp_SCORE, family = binomial(link = "logit"), data = prognostic_surgeries)
surgeries_prognostic_pred <- predict(prognostic_surgeries_glm, newdata = prognostic_surgeries, type = "response")
status.pred_IBD_prognostic <- ifelse(surgeries_prognostic_pred > 0.2, 1, 0) ## Escalation > 20%
table(prognostic_surgeries$Total_IBD_surgeries, status.pred_IBD_prognostic)
## status.pred_IBD_prognostic
## 0 1
## 0 20 6
## 1 1 5
roc.score_prognostic <- roc(prognostic_surgeries$Total_IBD_surgeries, status.pred_IBD_prognostic)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot.roc(roc.score_prognostic, col = "blue", print.auc = TRUE, legacy.axes = TRUE)
subset_st_surg <- clinical_prognostic %>% filter(clinical_prognostic$Maximal_Treatment %in% c('Steroid', 'Surgery'))
ggplot(subset_st_surg, aes(x = Maximal_Treatment, y = as.numeric(as.character(exp_SCORE)), fill = Maximal_Treatment)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
scale_color_npg() +
theme_bw() +
labs(y = "Prognostic PRS", x = "Maximal Treatment") +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")
subset_bio_surg <- clinical_prognostic %>% filter(clinical_prognostic$Maximal_Treatment %in% c('Surgery', 'Biologic'))
ggplot(subset_bio_surg, aes(x = Maximal_Treatment, y = as.numeric(as.character(exp_SCORE)), fill = Maximal_Treatment)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
scale_color_npg() +
theme_bw() +
labs(y = "Prognostic PRS", x = "Maximal Treatment") +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")