READ ME…
DATA DECRIPTION: We have data from participants of the National Cancer Institute (NCI). This data set includes some demographic, ancestral, clinical, and biomarker information on each participant within the study. Here we perform regression, correlation, and paired t-test.
PURPOSE: The goal for this study is to identify difference in clinical biomarker levels in a normal patient population study by self-reported race and ancestry. (DONE) We also want to see the influence of diabetes status, bmi, prostate cancer on biomarker levels based on self-reported race and ancestry.
if all packages cannot be installed use this
if(!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("ComplexHeatmap")
## Bioconductor version 3.19 (BiocManager 1.30.25), R 4.4.2 (2024-10-31)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'ComplexHeatmap'
install packages
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(readxl)
library("rstatix")
##
## Attaching package: 'rstatix'
## The following objects are masked from 'package:plyr':
##
## desc, mutate
## The following object is masked from 'package:stats':
##
## filter
library("ggplot2")
library("dplyr")
library("ggpubr")
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library("dunn.test")
library("ARTool")
library(openxlsx)
library("rio")
library(corrplot)
## corrplot 0.95 loaded
library(pheatmap)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.20.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
## in the original pheatmap() are identically supported in the new function. You
## can still use the original function by explicitly calling pheatmap::pheatmap().
##
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
##
## pheatmap
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
set working directory
setwd("/Users/ewamble/Desktop/Tsion/NCI_MD/Data/patient_pop_data")
rename data sets
NCI_OG_Study <- read_excel("/Users/ewamble/Desktop/Tsion/NCI_MD/Data/patient_pop_data/NCI_MD_OG_data.xlsx")
NCI_Ancestry <- read_excel("/Users/ewamble/Desktop/Tsion/NCI_MD/Data/patient_pop_data/WestAfrAncestry_NCIMDcontrols.xlsx")
outlier removal from serum proteomics paper
rm(list=ls())
PROJECT_DIR = "/Users/ewamble/Desktop/Tsion/NCI_MD/Data/patient_pop_data" # replace this line with your local path
infile = file.path(PROJECT_DIR, "NCI_MD_OG_data.xlsx")
data = as.matrix(read_excel(infile,sheet=1))
tail = 0.01
group = "controls_only" #c("cases_only","controls_only","all")
analyte_cols = 21:ncol(data)
n_analyte = length(analyte_cols)
if (group=="cases_only") {
select = data[,"case"]==1
} else if (group=="controls_only") {
select = data[,"case"]==0
} else if (group=="all") {
select = rep(T,nrow(data))
}
for (i_analyte in 1:n_analyte) {
intensity = as.numeric(data[select,analyte_cols[i_analyte]])
lower_bound = quantile(intensity,probs=tail)
intensity[intensity<lower_bound] = lower_bound
upper_bound = quantile(intensity,probs=1-tail)
intensity[intensity>upper_bound] = upper_bound
data[select,analyte_cols[i_analyte]] = intensity
}
outfile = file.path(PROJECT_DIR,"original_data_outliers_removed.txt")
output = rbind(colnames(data),data)
write(t(output),ncol=ncol(output),file=outfile,sep="\t")
rename data sets
NCI_OG_outrm <- read_excel("/Users/ewamble/Desktop/Tsion/NCI_MD/Data/patient_pop_data/original_data_outliers_removed.xlsx")
subset data
#TOTAL POPULATION
#pull out important columns from study
analytes_tp <- NCI_OG_outrm[,c("IL8", "TNFRSF9", "IL7", "IL6", "MCP1", "MCP4", "IL18", "CXCL1", "MCP2", "PDL1", "CD27", "CX3CL1", "IL8", "CD5", "MMP7", "MMP12", "IL12", "CSF1", "ARG1", "IL4", "IL5", "CD28", "NOS3", "CD4", "IL10", "PTN", "IL12RB1", "VEGFC", "MCP3", "CXCL5", "CXCL11", "PDL2")]
analytes_tp <- na.omit(analytes_tp)
control_pop <- subset(NCI_OG_outrm, case == 0)
cancer_pop <- subset(NCI_OG_outrm, case == 1)
analytes_contp <- control_pop[,c("IL8", "TNFRSF9", "IL7", "IL6", "MCP1", "MCP4", "IL18", "CXCL1", "MCP2", "PDL1", "CD27", "CX3CL1", "IL8", "CD5", "MMP7", "MMP12", "IL12", "CSF1", "ARG1", "IL4", "IL5", "CD28", "NOS3", "CD4", "IL10", "PTN", "IL12RB1", "VEGFC", "MCP3", "CXCL5", "CXCL11", "PDL2")]
analytes_contp <- na.omit(analytes_contp)
analytes_cancp <- cancer_pop[,c("IL8", "TNFRSF9", "IL7", "IL6", "MCP1", "MCP4", "IL18", "CXCL1", "MCP2", "PDL1", "CD27", "CX3CL1", "IL8", "CD5", "MMP7", "MMP12", "IL12", "CSF1", "ARG1", "IL4", "IL5", "CD28", "NOS3", "CD4", "IL10", "PTN", "IL12RB1", "VEGFC", "MCP3", "CXCL5", "CXCL11", "PDL2")]
analytes_cancp <- na.omit(analytes_cancp)
#prostate cancer status (PCa)
#self reported race (European American, African American, and African)
euro_sr <- subset(control_pop, race_num == 1)
afro_am_sr <- subset(control_pop, race_num == 2)
afro_sr <- subset(control_pop, race == 3)
pca_euro_sr <- subset(cancer_pop, race_num == 1)
pca_afro_am_sr <- subset(cancer_pop, race_num == 2)
pca_afro_sr <- subset(cancer_pop, race_num == 3)
#diabetic status
normal <- subset(control_pop, diabetes == 0)
diabetic <- subset(control_pop, diabetes == 1)
pca_normal <- subset(cancer_pop, diabetes == 0)
pca_diabetic <- subset(cancer_pop, diabetes == 1)
#bmi status
normal_bmi <- subset(control_pop, bmi < 30)
obesity_bmi <- subset(control_pop, bmi >= 30)
pca_normal_bmi <- subset(cancer_pop, bmi < 30)
pca_obesity_bmi <- subset(cancer_pop, bmi >= 30)
check data
#generate boxplots to identify outliers
boxplot(analytes_tp, main = "total population boxplot")
boxplot(analytes_contp, main = "control population boxplot")
boxplot(analytes_cancp, main = "prostate cancer poopulation boxplot")
race specific subset
#SELF-REPORTED RACE STUDY
#diabetic status
EA_normal <- subset(euro_sr, diabetes == 0)
EA_diabetic <- subset(euro_sr, diabetes == 1)
EA_pca_normal <- subset(pca_euro_sr, diabetes == 0)
EA_pca_diabetic <- subset(pca_euro_sr, diabetes == 1)
AA_normal <- subset(afro_am_sr, diabetes == 0)
AA_diabetic <- subset(afro_am_sr, diabetes == 1)
AA_pca_normal <- subset(pca_afro_am_sr, diabetes == 0)
AA_pca_diabetic <- subset(pca_afro_am_sr, diabetes == 1)
AFR_normal <- subset(afro_sr, diabetes == 0)
AFR_diabetic <- subset(afro_sr, diabetes == 1)
AFR_pca_normal <- subset(pca_afro_sr, diabetes == 0)
AFR_pca_diabetic <- subset(pca_afro_sr, diabetes == 1)
#bmi status
EA_normal_bmi <- subset(euro_sr, bmi < 30)
EA_obesity_bmi <- subset(euro_sr, bmi >= 30)
EA_pca_normal_bmi <- subset(pca_euro_sr, bmi < 30)
EA_pca_obesity_bmi <- subset(pca_euro_sr, bmi >= 30)
AA_normal_bmi <- subset(afro_am_sr, bmi < 30)
AA_obesity_bmi <- subset(afro_am_sr, bmi >= 30)
AA_pca_normal_bmi <- subset(pca_afro_am_sr, bmi < 30)
AA_pca_obesity_bmi <- subset(pca_afro_am_sr, bmi >= 30)
AFR_normal_bmi <- subset(afro_sr, bmi < 30)
AFR_obesity_bmi <- subset(afro_sr, bmi >= 30)
AFR_pca_normal_bmi <- subset(pca_afro_sr, bmi < 30)
AFR_pca_obesity_bmi <- subset(pca_afro_sr, bmi >= 30)
establish comparisons for stats test
my_comparisons <- list(c("African","African American"), c("African American", "European American"), c("African", "European American"))
Visualization
Here we compare the difference between biomarkers based on self-reported race using violin plots in the control population. IL8
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL8, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
#add pairwise comparisons p-value
stat_compare_means(label.y = 25, method = "anova") +
#remove legend
theme(legend.position = "none")
TNFRSF9
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=TNFRSF9, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, method = "t.test") +
# Add pairwise comparisons p-value
stat_compare_means(label.y = 15, method = "anova") +
#remove legend
theme(legend.position = "none")
IL7
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL7, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 15, method = "anova") +
#remove legend
theme(legend.position = "none")
IL6
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL6, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 25, method = "anova") +
#remove legend
theme(legend.position = "none")
MCP1
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=MCP1, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=7, label= "n = 654") + annotate("text", x=2, y=7, label= "n = 374") + annotate("text", x=3, y=7, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 20, method = "anova") +
#remove legend
theme(legend.position = "none")
IL18
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL18, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=5, label= "n = 654") + annotate("text", x=2, y=5, label= "n = 374") + annotate("text", x=3, y=5, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 17, method = "anova") +
#remove legend
theme(legend.position = "none")
CXCL1
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CXCL1, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=7, label= "n = 654") + annotate("text", x=2, y=7, label= "n = 374") + annotate("text", x=3, y=7, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 17, method = "anova") +
#remove legend
theme(legend.position = "none")
MCP2
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=MCP2, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=5, label= "n = 654") + annotate("text", x=2, y=5, label= "n = 374") + annotate("text", x=3, y=5, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 16, method = "anova") +
#remove legend
theme(legend.position = "none")
PDL1
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=PDL1, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=3, label= "n = 654") + annotate("text", x=2, y=3, label= "n = 374") + annotate("text", x=3, y=3, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 15, method = "anova") +
#remove legend
theme(legend.position = "none")
CD27
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CD27, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=5, label= "n = 654") + annotate("text", x=2, y=5, label= "n = 374") + annotate("text", x=3, y=5, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 15, method = "anova") +
#remove legend
theme(legend.position = "none")
CX3CL1
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CX3CL1, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=5, label= "n = 654") + annotate("text", x=2, y=5, label= "n = 374") + annotate("text", x=3, y=5, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 13, method = "anova") +
#remove legend
theme(legend.position = "none")
CD70
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CD70, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=1, label= "n = 654") + annotate("text", x=2, y=1, label= "n = 374") + annotate("text", x=3, y=1, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 10, method = "anova") +
#remove legend
theme(legend.position = "none")
CD5
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CD5, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=2, label= "n = 654") + annotate("text", x=2, y=2, label= "n = 374") + annotate("text", x=3, y=2, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 12, method = "anova") +
#remove legend
theme(legend.position = "none")
MMP7
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=MMP7, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=7, label= "n = 654") + annotate("text", x=2, y=7, label= "n = 374") + annotate("text", x=3, y=7, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 17, method = "anova") +
#remove legend
theme(legend.position = "none")
MMP12
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=MMP12, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 15, method = "anova") +
#remove legend
theme(legend.position = "none")
IL12
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL12, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 15, method = "anova") +
#remove legend
theme(legend.position = "none")
CSF1
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CSF1, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=7, label= "n = 654") + annotate("text", x=2, y=7, label= "n = 374") + annotate("text", x=3, y=7, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 11, method = "anova") +
#remove legend
theme(legend.position = "none")
ARG1
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=ARG1, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=3, label= "n = 654") + annotate("text", x=2, y=3, label= "n = 374") + annotate("text", x=3, y=3, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 10, method = "anova") +
#remove legend
theme(legend.position = "none")
IL4
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL4, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 8, method = "anova") +
#remove legend
theme(legend.position = "none")
IL5
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL5, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 12, method = "anova") +
#remove legend
theme(legend.position = "none")
CD28
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CD28, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=1, label= "n = 654") + annotate("text", x=2, y=1, label= "n = 374") + annotate("text", x=3, y=1, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 6, method = "anova") +
#remove legend
theme(legend.position = "none")
NOS3
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=NOS3, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 5, method = "anova") +
#remove legend
theme(legend.position = "none")
CD4
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CD4, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 6, method = "anova") +
#remove legend
theme(legend.position = "none")
IL10
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL10, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 12, method = "anova") +
#remove legend
theme(legend.position = "none")
PTN
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=PTN, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 10, method = "anova") +
#remove legend
theme(legend.position = "none")
IL12RB1
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=IL12RB1, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 8, method = "anova") +
#remove legend
theme(legend.position = "none")
VEGFC
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=VEGFC, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 8, method = "anova") +
#remove legend
theme(legend.position = "none")
MCP3
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=MCP3, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 20, method = "anova") +
#remove legend
theme(legend.position = "none")
CXCL5
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CXCL5, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=5, label= "n = 654") + annotate("text", x=2, y=5, label= "n = 374") + annotate("text", x=3, y=5, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 22, method = "anova") +
#remove legend
theme(legend.position = "none")
CXCL11
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=CXCL11, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=2, label= "n = 654") + annotate("text", x=2, y=2, label= "n = 374") + annotate("text", x=3, y=2, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 20, method = "anova") +
#remove legend
theme(legend.position = "none")
PDL2
#number of individuals
control_pop %>%
count(race)
## # A tibble: 3 × 2
## race n
## <chr> <int>
## 1 African 654
## 2 African American 374
## 3 European American 454
#plot
ggplot(control_pop, aes(x=factor(race), y=PDL2, fill=race))+ geom_violin(trim = FALSE)+ xlab("") + theme_minimal() + geom_boxplot(width=0.1) + scale_fill_manual(values=c("cadetblue3", "deepskyblue4", "darkolivegreen2")) +
#add the mean line for aggregat on each plot
stat_summary(geom = "errorbar", fun.min = mean, fun = mean, fun.max = mean, width = .75, linetype = "dashed") +
#add the number of observations to the bottom of the graph
annotate("text", x=1, y=0, label= "n = 654") + annotate("text", x=2, y=0, label= "n = 374") + annotate("text", x=3, y=0, label= "n = 454") +
#add T-test p-values for the comparison of means
stat_compare_means(comparisons = my_comparisons, t.test = "t.test")+
# Add pairwise comparisons p-value
stat_compare_means(label.y = 12, method = "anova") +
#remove legend
theme(legend.position = "none")
More Visualization
Here we compare the differences between ancestry and biomarker levels in the control population