#Reading in the files
prognostic_everything <- read.table("prognostic_everything_2.profile", header = T)
prognostic_0.05 <- read.table("prognostic_0.05.profile", header = T)
prognostic_10_5 <- read.table("prognostic_10_5.profile", header = T)
prognostic_10_8 <- read.table("prognostic_10_8.profile", header = T)
# Read in principal components capturing population stratificaton
pcs <- read.table("merged_pca_Shellfish.evecs")
# Accompanying .fam file
fam <- read.table("merged_pca.fam")
pcs <- t(pcs) %>%
as.data.frame() %>%
mutate(IID = fam$V2) %>%
relocate(IID)
colnames(pcs)[2:11] <- paste0("PC",seq(1,10))
# Exponentiate score to transform back to an OR scale, add disease status
prognostic_everything <- prognostic_everything %>%
mutate(exp_SCORE = exp(SCORE),
status = as.factor(ifelse(grepl("TRIAD", IID), 1, 0))) %>%
left_join(pcs)
## Joining, by = "IID"
table(prognostic_everything$status) ## 54 cases, 998 controls
##
## 0 1
## 998 54
prognostic_0.05 <- prognostic_0.05 %>%
mutate(exp_SCORE = exp(SCORE),
status = as.factor(ifelse(grepl("TRIAD", IID), 1, 0))) %>%
left_join(pcs)
## Joining, by = "IID"
prognostic_10_5 <- prognostic_10_5 %>%
mutate(exp_SCORE = exp(SCORE),
status = as.factor(ifelse(grepl("TRIAD", IID), 1, 0))) %>%
left_join(pcs)
## Joining, by = "IID"
prognostic_10_8 <- prognostic_10_8 %>%
mutate(exp_SCORE = exp(SCORE),
status = as.factor(ifelse(grepl("TRIAD", IID), 1, 0))) %>%
left_join(pcs)
## Joining, by = "IID"
# PRS Prognostic Distribution
p.prog_everything <- ggplot(prognostic_everything, aes(x = exp_SCORE, group = status)) +
geom_density(aes(fill = status), alpha = 0.7) +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
labs(x = "IBD Prognostic PRS", y = "Density")
p.prog_0.05 <- ggplot(prognostic_0.05, aes(x = exp_SCORE, group = status)) +
geom_density(aes(fill = status), alpha = 0.7) +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
labs(x = "IBD Prognostic PRS", y = "Density")
p.prog_10_5 <- ggplot(prognostic_10_5, aes(x = exp_SCORE, group = status)) +
geom_density(aes(fill = status), alpha = 0.7) +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
labs(x = "IBD Prognostic PRS", y = "Density")
p.prog_10_8 <- ggplot(prognostic_10_8, aes(x = exp_SCORE, group = status)) +
geom_density(aes(fill = status), alpha = 0.7) +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
labs(x = "IBD Prognostic PRS", y = "Density")
ggarrange(p.prog_everything, p.prog_0.05, p.prog_10_5, p.prog_10_8)
p.prognostic.box_everything <- ggplot(prognostic_everything, aes(y = exp_SCORE, x = status)) +
geom_boxplot(aes(fill = status), alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
theme(legend.position = "none") +
labs(y = "IBD Prognostic PRS", x = "Disease status")
p.prognostic.box_0.05 <- ggplot(prognostic_0.05, aes(y = exp_SCORE, x = status)) +
geom_boxplot(aes(fill = status), alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
theme(legend.position = "none") +
labs(y = "IBD Prognostic PRS", x = "Disease status")
p.prognostic.box_10_5 <- ggplot(prognostic_10_5, aes(y = exp_SCORE, x = status)) +
geom_boxplot(aes(fill = status), alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
theme(legend.position = "none") +
labs(y = "IBD Prognostic PRS", x = "Disease status")
p.prognostic.box_10_8 <- ggplot(prognostic_10_8, aes(y = exp_SCORE, x = status)) +
geom_boxplot(aes(fill = status), alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
theme_bw() +
theme(legend.position = "none") +
labs(y = "IBD Prognostic PRS", x = "Disease status")
ggarrange(p.prognostic.box_everything, p.prognostic.box_0.05, p.prognostic.box_10_5, p.prognostic.box_10_8)
## ==== T-test (is the mean PRS different between groups) ==== ##
prog.t_everything <- prognostic_everything %>% t_test(data = prognostic_everything, formula = exp_SCORE ~ status)
prog.t_everything ## p = 0.565
## # A tibble: 1 × 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 exp_SCORE 0 1 998 54 -0.578 59.3 0.565
prog.t_0.05 <- prognostic_0.05 %>% t_test(data = prognostic_0.05, formula = exp_SCORE ~ status)
prog.t_0.05 ## p = 0.453
## # A tibble: 1 × 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 exp_SCORE 0 1 998 54 0.755 59.9 0.453
prog.t_10_5 <- prognostic_10_5 %>% t_test(data = prognostic_10_5, formula = exp_SCORE ~ status)
prog.t_10_5 ## p = 0.549
## # A tibble: 1 × 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 exp_SCORE 0 1 998 54 0.602 61.4 0.549
prog.t_10_8 <- prognostic_10_8 %>% t_test(data = prognostic_10_8, formula = exp_SCORE ~ status)
prog.t_10_8 ## p = 0.663
## # A tibble: 1 × 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 exp_SCORE 0 1 998 54 0.437 60.7 0.663
## ==== Logistic regression (estimates how disease status varies based on PRS) ==== ##
stat.prog_everything <- glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"), data = prognostic_everything)
summary(stat.prog_everything)
##
## Call:
## glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"),
## data = prognostic_everything)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3704 -0.3321 -0.3226 -0.3145 2.4983
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -854.6 1513.6 -0.565 0.572
## exp_SCORE 851.5 1513.1 0.563 0.574
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 425.57 on 1050 degrees of freedom
## AIC: 429.57
##
## Number of Fisher Scoring iterations: 5
coef(summary(stat.prog_everything))[,'Pr(>|z|)']
## (Intercept) exp_SCORE
## 0.5723085 0.5736190
#p = 0.574
stat.prog_0.05 <- glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"), data = prognostic_0.05)
summary(stat.prog_0.05)
##
## Call:
## glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"),
## data = prognostic_0.05)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3717 -0.3342 -0.3238 -0.3114 2.5230
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 127.8 186.3 0.686 0.493
## exp_SCORE -132.5 188.7 -0.702 0.483
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 425.38 on 1050 degrees of freedom
## AIC: 429.38
##
## Number of Fisher Scoring iterations: 5
coef(summary(stat.prog_0.05))[,'Pr(>|z|)']
## (Intercept) exp_SCORE
## 0.4925264 0.4827215
#p = 0.483
stat.prog_10_5 <- glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"), data = prognostic_10_5)
summary(stat.prog_10_5)
##
## Call:
## glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"),
## data = prognostic_10_5)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3330 -0.3317 -0.3306 -0.3126 2.4890
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.005 1.772 -1.132 0.258
## exp_SCORE -1.333 2.589 -0.515 0.607
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 425.61 on 1050 degrees of freedom
## AIC: 429.61
##
## Number of Fisher Scoring iterations: 5
coef(summary(stat.prog_10_5))[,'Pr(>|z|)']
## (Intercept) exp_SCORE
## 0.2577509 0.6065992
#p = 0.607
stat.prog_10_8 <- glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"), data = prognostic_10_8)
summary(stat.prog_10_8)
##
## Call:
## glm(formula = status ~ exp_SCORE, family = binomial(link = "logit"),
## data = prognostic_10_8)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3291 -0.3291 -0.3291 -0.3105 2.4724
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4767 1.1386 -2.175 0.0296 *
## exp_SCORE -0.6857 1.7661 -0.388 0.6978
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 425.73 on 1050 degrees of freedom
## AIC: 429.73
##
## Number of Fisher Scoring iterations: 5
coef(summary(stat.prog_10_8))[,'Pr(>|z|)']
## (Intercept) exp_SCORE
## 0.02961642 0.69783285
#p = 0.6978
# Logistic Regression with PC inclusion
p.pcs <- ggplot(prognostic_everything, aes(x = PC1, y = PC2)) +
geom_point(aes(fill = status, shape = status), alpha = 0.7, size = 4) +
scale_shape_manual(name = "Disease status", values = c(21,24)) +
scale_fill_manual(name = "Disease status", values = c("grey90","grey20")) +
theme_bw()
p.PC1 <- ggplot(prognostic_everything, aes(y = PC1, x = status)) +
geom_boxplot(aes(fill = status), alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 3, pch = 21, stroke = 0.7, fill = "white") +
scale_fill_manual(name = "Disease status", values = c("grey70","grey20")) +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format") +
theme_bw() +
theme(legend.position = "none") +
labs(y = "PC1", x = "Disease status")
gridExtra::grid.arrange(grobs = list(p.pcs,p.PC1), nrow = 1, widths = c(0.7,0.3))
stat.PCs.prog_everything <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_everything)
summary(stat.PCs.prog_everything)
##
## Call:
## glm(formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 +
## PC6 + PC7 + PC8 + PC9 + PC10, family = binomial(link = "logit"),
## data = prognostic_everything)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6661 -0.3392 -0.2697 -0.2093 2.8420
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -712.52 1613.86 -0.442 0.658850
## exp_SCORE 728.14 1612.98 0.451 0.651682
## PC1 1488.15 416.01 3.577 0.000347 ***
## PC2 -65.44 342.87 -0.191 0.848635
## PC3 104.77 185.61 0.564 0.572439
## PC4 90.33 72.67 1.243 0.213867
## PC5 54.72 59.07 0.926 0.354311
## PC6 -77.18 39.54 -1.952 0.050919 .
## PC7 17.68 53.48 0.331 0.740886
## PC8 -63.96 42.39 -1.509 0.131303
## PC9 35.70 36.27 0.984 0.324935
## PC10 91.98 31.84 2.888 0.003871 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 381.27 on 1040 degrees of freedom
## AIC: 405.27
##
## Number of Fisher Scoring iterations: 6
coef(summary(stat.PCs.prog_everything))[,'Pr(>|z|)']
## (Intercept) exp_SCORE PC1 PC2 PC3 PC4
## 0.6588495593 0.6516817100 0.0003473392 0.8486345878 0.5724387608 0.2138674181
## PC5 PC6 PC7 PC8 PC9 PC10
## 0.3543105323 0.0509191383 0.7408861863 0.1313031230 0.3249345492 0.0038714124
# p = 0.652
stat.PCs.prog_0.05 <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_0.05)
summary(stat.PCs.prog_0.05)
##
## Call:
## glm(formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 +
## PC6 + PC7 + PC8 + PC9 + PC10, family = binomial(link = "logit"),
## data = prognostic_0.05)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6680 -0.3432 -0.2671 -0.2105 2.7864
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 118.99 196.96 0.604 0.545748
## exp_SCORE -104.29 199.41 -0.523 0.600983
## PC1 1506.57 413.38 3.645 0.000268 ***
## PC2 -64.92 343.37 -0.189 0.850032
## PC3 109.68 185.76 0.590 0.554878
## PC4 92.24 72.78 1.267 0.205025
## PC5 48.89 58.99 0.829 0.407216
## PC6 -76.75 39.41 -1.948 0.051445 .
## PC7 17.55 53.45 0.328 0.742702
## PC8 -64.07 42.31 -1.514 0.129913
## PC9 33.03 36.40 0.907 0.364234
## PC10 89.78 31.75 2.827 0.004693 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 381.20 on 1040 degrees of freedom
## AIC: 405.2
##
## Number of Fisher Scoring iterations: 6
coef(summary(stat.PCs.prog_0.05))[,'Pr(>|z|)']
## (Intercept) exp_SCORE PC1 PC2 PC3 PC4
## 0.5457483160 0.6009826805 0.0002679013 0.8500323707 0.5548778486 0.2050249637
## PC5 PC6 PC7 PC8 PC9 PC10
## 0.4072157779 0.0514451441 0.7427016407 0.1299127101 0.3642341529 0.0046930511
# p = 0.601
stat.PCs.prog_10_5 <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_10_5)
summary(stat.PCs.prog_10_5)
##
## Call:
## glm(formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 +
## PC6 + PC7 + PC8 + PC9 + PC10, family = binomial(link = "logit"),
## data = prognostic_10_5)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6703 -0.3425 -0.2672 -0.2088 2.7979
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 16.832 5.198 3.238 0.001203 **
## exp_SCORE -1.218 2.739 -0.445 0.656479
## PC1 1500.748 412.586 3.637 0.000275 ***
## PC2 -64.131 343.356 -0.187 0.851835
## PC3 109.920 185.343 0.593 0.553138
## PC4 91.817 72.760 1.262 0.206980
## PC5 51.231 58.779 0.872 0.383435
## PC6 -77.625 39.463 -1.967 0.049178 *
## PC7 18.956 53.553 0.354 0.723363
## PC8 -63.071 42.367 -1.489 0.136572
## PC9 34.351 36.203 0.949 0.342696
## PC10 90.954 31.730 2.867 0.004150 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 381.27 on 1040 degrees of freedom
## AIC: 405.27
##
## Number of Fisher Scoring iterations: 6
coef(summary(stat.PCs.prog_10_5))[,'Pr(>|z|)']
## (Intercept) exp_SCORE PC1 PC2 PC3 PC4
## 0.0012029670 0.6564789898 0.0002753827 0.8518347636 0.5531380525 0.2069797293
## PC5 PC6 PC7 PC8 PC9 PC10
## 0.3834348122 0.0491780898 0.7233628700 0.1365719075 0.3426959532 0.0041500055
# p = 0.656
stat.PCs.prog_10_8 <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_10_8)
summary(stat.PCs.prog_10_8)
##
## Call:
## glm(formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 +
## PC6 + PC7 + PC8 + PC9 + PC10, family = binomial(link = "logit"),
## data = prognostic_10_8)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6738 -0.3427 -0.2676 -0.2085 2.8020
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 16.5593 5.0109 3.305 0.000951 ***
## exp_SCORE -0.8395 1.8751 -0.448 0.654342
## PC1 1502.4832 412.2607 3.644 0.000268 ***
## PC2 -65.0298 343.4095 -0.189 0.849807
## PC3 108.8145 185.3544 0.587 0.557162
## PC4 92.3153 72.7888 1.268 0.204704
## PC5 51.5643 58.7907 0.877 0.380442
## PC6 -77.8312 39.4883 -1.971 0.048725 *
## PC7 18.8945 53.5391 0.353 0.724156
## PC8 -63.3290 42.3540 -1.495 0.134854
## PC9 34.4987 36.2145 0.953 0.340782
## PC10 91.0421 31.7219 2.870 0.004105 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 425.88 on 1051 degrees of freedom
## Residual deviance: 381.26 on 1040 degrees of freedom
## AIC: 405.26
##
## Number of Fisher Scoring iterations: 6
coef(summary(stat.PCs.prog_10_8))[,'Pr(>|z|)']
## (Intercept) exp_SCORE PC1 PC2 PC3 PC4
## 0.0009509936 0.6543423897 0.0002679148 0.8498065200 0.5571620865 0.2047040948
## PC5 PC6 PC7 PC8 PC9 PC10
## 0.3804417306 0.0487245032 0.7241562128 0.1348542724 0.3407817557 0.0041046105
# p = 0.654
## Using ROC curves to calculate PRS sensitivity and specificity for IBD diagnosis
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## GLM (as above)
stat.PCs.prog_everything <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_everything)
## Predict disease status from model
status.pred.prog_everything <- predict(stat.PCs.prog_everything, newdata = prognostic_everything, type = "response")
status.pred.prog_everything <- ifelse(status.pred.prog_everything > 0.20, 1, 0) ## IBD prediction >20%
pred.res <- data.frame(IID = prognostic_everything$IID,
status = prognostic_everything$status,
status_pred_everything = status.pred.prog_everything)
## Compare results
table(prognostic_everything$status, status.pred.prog_everything)
## status.pred.prog_everything
## 0 1
## 0 989 9
## 1 47 7
## Calculate AUC (area under the curve, ie. prediction accuracy)
par(mfrow=c(2,2))
roc.prog_everything <- roc(prognostic_everything$status, status.pred.prog_everything)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc.prog_everything #AUC 0.560
##
## Call:
## roc.default(response = prognostic_everything$status, predictor = status.pred.prog_everything)
##
## Data: status.pred.prog_everything in 998 controls (prognostic_everything$status 0) < 54 cases (prognostic_everything$status 1).
## Area under the curve: 0.5603
plot.roc(roc.prog_everything, col = "blue", print.auc = TRUE)
#p<0.05
stat.PCs.prog_0.05 <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_0.05)
status.pred.prog_0.05 <- predict(stat.PCs.prog_0.05, newdata = prognostic_0.05, type = "response")
status.pred.prog_0.05 <- ifelse(status.pred.prog_0.05 > 0.20, 1, 0) ## IBD prediction >20%
roc.prog_0.05 <- roc(prognostic_0.05$status, status.pred.prog_0.05)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc.prog_0.05 #AUC 0.561
##
## Call:
## roc.default(response = prognostic_0.05$status, predictor = status.pred.prog_0.05)
##
## Data: status.pred.prog_0.05 in 998 controls (prognostic_0.05$status 0) < 54 cases (prognostic_0.05$status 1).
## Area under the curve: 0.5613
plot.roc(roc.prog_0.05, col = "blue", print.auc = TRUE)
#p<1*10^5
stat.PCs.prog_10_5 <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_10_5)
status.pred.prog_10_5 <- predict(stat.PCs.prog_10_5, newdata = prognostic_10_5, type = "response")
status.pred.prog_10_5 <- ifelse(status.pred.prog_10_5 > 0.20, 1, 0) ## IBD prediction >20%
roc.prog_10_5 <- roc(prognostic_10_5$status, status.pred.prog_10_5)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc.prog_10_5 #AUC 0.561
##
## Call:
## roc.default(response = prognostic_10_5$status, predictor = status.pred.prog_10_5)
##
## Data: status.pred.prog_10_5 in 998 controls (prognostic_10_5$status 0) < 54 cases (prognostic_10_5$status 1).
## Area under the curve: 0.5608
plot.roc(roc.prog_10_5, col = "blue", print.auc = TRUE)
#p<1*10^8
stat.PCs.prog_10_8 <- glm(
formula = status ~ exp_SCORE + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
family = binomial(link = "logit"), data = prognostic_10_8)
status.pred.prog_10_8 <- predict(stat.PCs.prog_10_8, newdata = prognostic_10_8, type = "response")
status.pred.prog_10_8 <- ifelse(status.pred.prog_10_8 > 0.20, 1, 0) ## IBD prediction >20%
table(prognostic_10_8$status, status.pred.prog_10_8)
## status.pred.prog_10_8
## 0 1
## 0 990 8
## 1 47 7
roc.prog_10_8 <- roc(prognostic_10_8$status, status.pred.prog_10_8)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc.prog_10_8 #AUC 0.561
##
## Call:
## roc.default(response = prognostic_10_8$status, predictor = status.pred.prog_10_8)
##
## Data: status.pred.prog_10_8 in 998 controls (prognostic_10_8$status 0) < 54 cases (prognostic_10_8$status 1).
## Area under the curve: 0.5608
plot.roc(roc.prog_10_8, col = "blue", print.auc = TRUE)
clinical_data_prognostic <- read.csv("updated_clinical_data.csv")
merged_clinical_prognostic_initial <- merge(prognostic_everything, clinical_data_prognostic, by.x = "IID", by.y = "Patient_ID", all.x = TRUE, all.y = FALSE)
merged_clinical_prognostic <- subset(merged_clinical_prognostic_initial, grepl("TRIAD", merged_clinical_prognostic_initial$IID))
clinical_prognostic <- subset(merged_clinical_prognostic, !is.na(merged_clinical_prognostic$DOB))
prognostic_escalation <- subset(clinical_prognostic, !is.na(clinical_prognostic$Total_escalations))
escalataions_year <- prognostic_escalation$Total_escalations/(prognostic_escalation$FollowUp_Days/365)
escalations_df <- data.frame(prognostic_escalation$exp_SCORE, prognostic_escalation$Total_escalations, escalataions_year)
colnames(escalations_df) <- c("exp_SCORE","Escalations", "Escalation_rate")
escalation_binary <- ifelse(grepl("0", prognostic_escalation$Total_escalations), "No_Escalation", "Escalation")
escalation_binary_table <- data.frame(escalation_binary, prognostic_escalation$exp_SCORE)
box_escalation_prognostic_binary <- ggplot(escalation_binary_table, aes(x = escalation_binary, y = prognostic_escalation.exp_SCORE)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
scale_fill_manual(name = "Escalations vs No escalations", values = c("grey70","grey20")) +
theme_bw() +
theme(legend.position = "none") +
labs(y = "Prognostic PRS", x = "Escalations vs No Escalations") +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")
density_prognostic_escalation <- ggplot(escalation_binary_table, aes(x = prognostic_escalation.exp_SCORE, group = escalation_binary)) +
geom_density(aes(fill = escalation_binary), alpha = 0.7) +
scale_fill_manual(name = "Escalation Status", values = c("grey70","grey20")) +
theme_bw() +
labs(x = "Prognostic PRS", y = "Density")
ggarrange(box_escalation_prognostic_binary, density_prognostic_escalation, ncol = 1, nrow = 2, labels = c("A", "B"))
prognostic_escalation_rate <- ggplot(escalations_df, aes(x=Escalation_rate , y=exp_SCORE)) + geom_point() + annotate("text", x = 2.1, y = 1.00032, label="p-value = 0.26") + annotate("text", x = 2.1, y = 1.0003, label="R^2 = 0.04172") + labs(y = "Prognostic PRS", x = "Escalation rate (Escalation per year of Follow Up)") + geom_smooth(method=lm, linetype="dashed", color="darkred", fill="blue")
prognostic_escalation_rate
## `geom_smooth()` using formula = 'y ~ x'
prognostic_surgeries <- subset(clinical_prognostic, !is.na(clinical_prognostic$Total_IBD_surgeries))
box_surgeries <- ggplot(prognostic_surgeries, aes(x = as.character(Total_IBD_surgeries), y = exp_SCORE)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
theme_bw() +
theme(legend.position = "none") +
labs(y = "Prognostic PRS Score", x = "Surgery vs No Surgery") +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")
density_surgeries <- ggplot(prognostic_surgeries, aes(x = exp_SCORE, group = as.character(Total_IBD_surgeries))) +
geom_density(aes(fill = as.character(Total_IBD_surgeries)), alpha = 0.7) +
scale_fill_manual(name = "Surgery Status", values = c("grey70","grey20")) +
theme_bw() +
labs(x = "Prognostic PRS", y = "Density")
ggarrange(box_surgeries, density_surgeries, ncol = 1, nrow = 2, labels = c("A", "B"))
clinical_prognostic_initial_class <- clinical_prognostic %>%
mutate(Initial_treatment_Class = recode(Initial_treatment_Class, "Aninosalicylate" = "Aminosalicylate"))
ggplot(clinical_prognostic_initial_class, aes(x=Initial_treatment_Class, y=exp_SCORE)) + geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") + geom_boxplot(alpha = 0.7, outlier.shape = NA) + ggtitle("Effect of Initial Treatment on Score") + labs(y = "PRS", x = "Initial Treatment")
filtered_initial_treatment <- clinical_prognostic_initial_class %>%
filter(!is.na(Initial_treatment_Class), !is.na(exp_SCORE))
ggplot(filtered_initial_treatment, aes(x = factor(ifelse(Initial_treatment_Class == "None", "None", "Other")), y = exp_SCORE)) +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
geom_boxplot(fill = alpha("white", 0.5)) +
ggtitle("Initial treatment vs no treatment on Score") +
labs(y = "PRS", x = "Initial Treatment") +
scale_x_discrete(labels = c("None", "Treatment")) +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")
amino_steroid <- clinical_prognostic_initial_class %>%
filter(Initial_treatment_Class %in% c("Aminosalicylate", "Steroid"), !is.na(exp_SCORE))
ggplot(amino_steroid, aes(x = Initial_treatment_Class, y = exp_SCORE)) +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
geom_boxplot(fill = alpha("white", 0.5)) +
ggtitle("Effect of Initial Treatment on Score") +
labs(y = "PRS", x = "Initial Treatment") +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")
ggplot(clinical_prognostic_initial_class, aes(x=Escalation1_Treatment_Class, y=exp_SCORE)) + geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") + geom_boxplot(alpha = 0.7, outlier.shape = NA) + ggtitle("Effect of 1st Escalation Type on Score") + labs(y = "Prognostic PRS", x = "1st Escalation Treatment")
ggplot(clinical_prognostic_initial_class, aes(x = ifelse(is.na(Escalation1_Treatment_Class), "NA", "Other"), y = exp_SCORE)) +
geom_jitter(width = 0.2, size = 2, pch = 21, stroke = 0.7, fill = "white") +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
ggtitle("Effect of 1st Escalation Type on Score") +
labs(y = "Prognostic PRS", x = "1st Escalation Treatment") +
scale_x_discrete(labels = c("Other", "NA")) +
ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format")
prognostic_endoscopy <- subset(clinical_prognostic, !is.na(clinical_prognostic$Endoscopy))
prognostic_endoscopy$Endoscopy[prognostic_endoscopy$Endoscopy == "Severe (CT)"] <- "Severe"
anova_prognostic_endoscopy <- aov(exp_SCORE ~ Endoscopy, data = prognostic_endoscopy)
summary(anova_prognostic_endoscopy)
## Df Sum Sq Mean Sq F value Pr(>F)
## Endoscopy 2 2.017e-08 1.008e-08 1.54 0.235
## Residuals 24 1.572e-07 6.548e-09
box_prognostic_endoscopy <- ggplot(prognostic_endoscopy, aes(x=Endoscopy, y=exp_SCORE)) + geom_boxplot() + labs(y = "Prognostic PRS", x = "Endoscopy Findings")
box_prognostic_endoscopy
prognostic_clinical_extent <- subset(clinical_prognostic, !is.na(clinical_prognostic$Extent))
box_update_extent <- ggplot(prognostic_clinical_extent, aes(x=Extent, y=exp_SCORE)) + geom_boxplot() + ggtitle("Effect of Disease Extent on Prognostic Score") + labs(y = "Prognostic PRS", x = "Disease Extent")
box_update_extent
anova_extent_update <- aov(exp_SCORE ~ Extent, data = prognostic_clinical_extent)
summary(anova_extent_update) # p < 0.32
## Df Sum Sq Mean Sq F value Pr(>F)
## Extent 4 3.266e-08 8.164e-09 1.234 0.323
## Residuals 24 1.588e-07 6.615e-09
prognostic_extensive_binary <- ifelse(grepl("Extensive", prognostic_clinical_extent$Extent), "Extensive", "Not_Extensive")
extensive_or_not_update <- data.frame(prognostic_extensive_binary, prognostic_clinical_extent$exp_SCORE)
ggplot(extensive_or_not_update, aes(x=prognostic_extensive_binary, y=prognostic_clinical_extent$exp_SCORE)) + geom_boxplot() + ggtitle("Extensive vs not extensive on Score") + ggpubr::stat_compare_means(method = "wilcox.test", label = "p.format") + theme_bw() +
theme(legend.position = "none") +
labs(y = "Prognostic PRS", x = "Extensive vs Not Extensive") + stat_boxplot(geom = "errorbar") +
geom_jitter(width = 0.2, size = 1.5, pch = 21, stroke = 0.7, fill = "white")