rm(list = ls())
#load libraries
library(readr)
library(ggplot2)
library(tidyr)
library(dplyr)
library(reshape2)
library(gplots)
library(ggpubr)
library(ggthemr)
library(forcats)
library(ggpmisc)
library(cowplot)
library(ggtext)
#set theme
ggthemr('fresh')
#import data
qPCR<-read.csv("qpCR abundances v2.csv")
isolate<-read.csv("Isolate abundances v2.csv")
#a few calculations
#percent E. coli by qPCR (uida/total bac copies)
qPCR$perc_E.coli<-qPCR$UIDA_copies_ng/qPCR$Total_Bacteria_copies_ng * 100
#convert isolate rel abundances to percents
isolate$MG.abundance.perc<-isolate$MG.abundance *100
#filter and melt data for symp/asymp abundance barplots
#qPCR
qPCR_melt<-qPCR %>% subset(Include=='y' & pathotype=="pathpos") %>%
melt(id.vars=c("Sample_ID", "cc", "symp.asymp", "pathotype", "UIDA_stdev"), variable.name=("measurement"), value.name="value")
qPCR_melt$value<-as.numeric(qPCR_melt$value)
#isolate
isolate_melt<-isolate %>% subset(Include=="yes") %>%
melt(id.vars=c("E..coli.isolate.ID", "Sample_ID", "cc", "symp.asymp", "Pathotype.match", "Rotavirus."), variable.name="measurement", valu.name="value")%>%
subset(measurement=="MG.abundance.perc")
isolate_melt$value<-as.numeric(isolate_melt$value)
#qPCR plots
#uida (total E. coli target)
#barplot
df_uida<-qPCR_melt %>%
subset(measurement=="UIDA_copies_ng") %>%
drop_na("value")
df_uida.mean = df_uida %>%
group_by(symp.asymp) %>%
mutate(mean = mean(value))
uida_bar<-qPCR_melt %>%
subset(measurement=="UIDA_copies_ng") %>%
drop_na("value")%>%
arrange(value) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, value)) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, symp.asymp)) %>%
ggplot(aes(fill=symp.asymp, y=value, x=Sample_ID))+
geom_bar(stat="identity")+
scale_fill_manual(values=c("Symptomatic"="coral2", "Asymptomatic"="steelblue3"))+
ggtitle("E. coli qPCR abundance")+
ylab("E. coli abundance (uida copies/ng)")+
xlab(NULL)+
geom_errorbar(data=df_uida.mean, aes(Sample_ID, ymax = mean, ymin = mean), size=0.5, linetype = "longdash", inherit.aes = F, width = 1, color="black")+
geom_errorbar(aes(ymin=value-UIDA_stdev, ymax=value+UIDA_stdev, width=0.3), color="black")+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), legend.title=element_blank(), panel.border=element_rect(color="black", fill=NA, size=0.5))
#boxplot
uida_box<-qPCR_melt %>%
subset(measurement=="UIDA_copies_ng") %>%
drop_na("value")%>%
ggplot(aes(fill=symp.asymp, y=value, x=symp.asymp))+
geom_boxplot()+
scale_fill_manual(values=c("Symptomatic"="coral2", "Asymptomatic"="steelblue3"))+
ylab("% abundance")+
xlab(NULL)+
#stat_compare_means(method="wilcox.test", label.x=0.53)+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.position="none", panel.border=element_rect(color="black", fill=NA, size=0.5), axis.text.x=(element_text(size=8)))
#perc E.coli (uida/total back 16S target)
#barplot
df_perc_E.coli<-qPCR_melt %>%
subset(measurement=="perc_E.coli") %>%
drop_na("value")
df_perc_E.coli.mean = df_perc_E.coli %>%
group_by(symp.asymp) %>%
mutate(mean = mean(value))
perc_E.coli_bar<-qPCR_melt %>%
subset(measurement=="perc_E.coli") %>%
drop_na("value")%>%
arrange(value) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, value)) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, symp.asymp)) %>%
ggplot(aes(fill=symp.asymp, y=value, x=Sample_ID))+
geom_bar(stat="identity")+
scale_fill_manual(values=c("Symptomatic"="coral2", "Asymptomatic"="steelblue3"), name="DEC infection type")+
ggtitle(expression(paste(italic("E. coli")," qPCR abundance")))+
ylab("% abundance")+
xlab(NULL)+
geom_errorbar(data=df_perc_E.coli.mean, aes(Sample_ID, ymax = mean, ymin = mean), size=0.5, linetype = "longdash", inherit.aes = F, width = 1, color="black")+
scale_y_continuous(breaks=seq(0,8,4))+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), panel.border=element_rect(color="black", fill=NA, size=0.5), legend.position="none",)
#boxplot
perc_E.coli_box<-qPCR_melt %>%
subset(measurement=="perc_E.coli") %>%
drop_na("value")%>%
ggplot(aes(fill=symp.asymp, y=value, x=symp.asymp))+
geom_boxplot()+
scale_fill_manual(values=c("Symptomatic"="coral2", "Asymptomatic"="steelblue3"), name="DEC infection type")+
xlab(NULL)+
scale_y_continuous(breaks=seq(0,8,4))+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.position="none", panel.border=element_rect(color="black", fill=NA, size=0.5), axis.title.y=element_blank(), axis.text.x=(element_text(size=8)))
uida_plot<- ggdraw() +
draw_plot(uida_bar) +
draw_plot(uida_box, x = 0.675, y = 0.55, width = 0.25, height = 0.325)
uida_plot
perc_E.coli_plot<- ggdraw() +
draw_plot(perc_E.coli_bar) +
draw_plot(perc_E.coli_box, x = 0.675, y = 0.55, width = 0.25, height = 0.325)
perc_E.coli_plot
#Isolate abundance plots
#isolate abundance
#barplot
df_isolate.mean = isolate_melt %>%
group_by(symp.asymp) %>%
mutate(mean = mean(value))
isolate_bar<-isolate_melt %>%
#arrange(value) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, value)) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, symp.asymp)) %>%
ggplot(aes(fill=symp.asymp, y=value, x=Sample_ID))+
geom_bar(stat="identity")+
scale_fill_manual(values=c("Symptomatic"="coral2", "Asymptomatic"="steelblue3"), name="DEC infection type")+
ggtitle("DEC isolate metagenome abundance")+
ylab("% abundance")+
xlab(NULL)+
geom_errorbar(data=df_isolate.mean, aes(Sample_ID, ymax = mean, ymin = mean), size=0.5, linetype = "longdash", inherit.aes = F, width = 1, color="black")+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), panel.border=element_rect(color="black", fill=NA, size=0.5), legend.position="none", plot.title=element_text(face="plain"))
#boxplot
isolate_box<-isolate_melt %>%
ggplot(aes(fill=symp.asymp, y=value, x=symp.asymp))+
geom_boxplot()+
scale_fill_manual(values=c("Symptomatic"="coral2", "Asymptomatic"="steelblue3"), name="DEC infection type")+
xlab(NULL)+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.position="none", panel.border=element_rect(color="black", fill=NA, size=0.5), axis.title.y=element_blank(), axis.text.x=(element_text(size=8)))
isolate_plot<-ggdraw() +
draw_plot(isolate_bar) +
draw_plot(isolate_box, x = 0.675, y = 0.55, width = 0.25, height = 0.325)
isolate_plot
#qPCR stats
#Total E. coli (uida gene copies) symp.asymp comparison
qPCR_melt_symp_uida<-qPCR_melt %>%
subset(symp.asymp=="Symptomatic" & measurement=="UIDA_copies_ng") %>%
drop_na("value")
qPCR_melt_asymp_uida<-qPCR_melt %>%
subset(symp.asymp=="Asymptomatic" & measurement=="UIDA_copies_ng") %>%
drop_na("value")
wilcox.test(qPCR_melt_symp_uida$value, qPCR_melt_asymp_uida$value)
##
## Wilcoxon rank sum exact test
##
## data: qPCR_melt_symp_uida$value and qPCR_melt_asymp_uida$value
## W = 231, p-value = 0.1412
## alternative hypothesis: true location shift is not equal to 0
#Percent E. coli symp.asymp comparison
qPCR_melt_symp_perc<-qPCR_melt %>%
subset(symp.asymp=="Symptomatic" & measurement=="perc_E.coli") %>%
drop_na("value")
#Percent E. coli symp.asymp comparison
qPCR_melt_asymp_perc<-qPCR_melt %>%
subset(symp.asymp=="Asymptomatic" & measurement=="perc_E.coli") %>%
drop_na("value")
wilcox.test(qPCR_melt_symp_perc$value, qPCR_melt_asymp_perc$value)
##
## Wilcoxon rank sum exact test
##
## data: qPCR_melt_symp_perc$value and qPCR_melt_asymp_perc$value
## W = 189, p-value = 0.1114
## alternative hypothesis: true location shift is not equal to 0
#isolate stats
isolate_melt_symp<-isolate_melt %>%
subset(symp.asymp=="Symptomatic") %>%
drop_na("value")
isolate_melt_asymp<-isolate_melt %>%
subset(symp.asymp=="Asymptomatic") %>%
drop_na("value")
wilcox.test(isolate_melt_symp$value, isolate_melt_asymp$value)
##
## Wilcoxon rank sum exact test
##
## data: isolate_melt_symp$value and isolate_melt_asymp$value
## W = 513, p-value = 0.008488
## alternative hypothesis: true location shift is not equal to 0
#qPCR and metagenome abundance comparisons
#join abundance data
isolate_qPCR<- left_join(isolate, qPCR, by=c("Sample_ID"))
#formula
formula <- y ~ x
#E. coli by qPCR v. E.coli islate MG abundance comparison
isolate_qPCR_scat_uida<-ggplot(isolate_qPCR, aes(x=UIDA_copies_ng, y=MG.abundance.perc))+
geom_point(size=3)+
xlab("qPCR abundance (uida copies/ng)")+
ylab("isolate metagenome abundance (%)")+
geom_smooth(method=lm, linetype="dashed", color="black", se=FALSE)+
stat_poly_eq(aes(label = paste(..rr.label..)), formula = formula, parse = TRUE, label.y.npc=.99, label.x.npc=0.03)+
stat_fit_glance(method = 'lm', method.args = list(formula = formula), geom = 'text', aes(label = paste("p = ", signif(..p.value.., digits = 2), sep = "")))+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.position="none", panel.border=element_rect(color="black", fill=NA, size=0.5))
isolate_qPCR_scat_uida
#percent E. coli
isolate_qPCR_scat_perc<-ggplot(isolate_qPCR, aes(x=perc_E.coli, y=MG.abundance.perc))+
geom_point(size=3)+
xlab(expression(paste(italic("E. coli")," qPCR abundance (%)")))+
ylab("DEC isolate metagenome abundance (%)")+
geom_smooth(method=lm, linetype="dashed", color="black", se=FALSE)+
stat_poly_eq(aes(label = paste(..rr.label..)), formula = formula, parse = TRUE, label.y.npc=.99, label.x.npc=0.03)+
stat_fit_glance(method = 'lm', method.args = list(formula = formula), geom = 'text', aes(label = paste("p = ", signif(..p.value.., digits = 2), sep = "")))+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.position="none", panel.border=element_rect(color="black", fill=NA, size=0.5))
isolate_qPCR_scat_perc
#Spearman correlations
corr_isolate_uida<-cor.test(isolate_qPCR$UIDA_copies_ng, isolate_qPCR$MG.abundance, method=c("pearson"), use="complete.obs")
corr_isolate_uida
##
## Pearson's product-moment correlation
##
## data: isolate_qPCR$UIDA_copies_ng and isolate_qPCR$MG.abundance
## t = 4.8115, df = 39, p-value = 2.266e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3726440 0.7728579
## sample estimates:
## cor
## 0.6103243
corr_isolate_perc_E.coli<-cor.test(isolate_qPCR$perc_E.coli, isolate_qPCR$MG.abundance, method=c("pearson"), use="complete.obs")
corr_isolate_perc_E.coli
##
## Pearson's product-moment correlation
##
## data: isolate_qPCR$perc_E.coli and isolate_qPCR$MG.abundance
## t = 17.389, df = 35, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8982262 0.9724342
## sample estimates:
## cor
## 0.9467119
#extract legend
isolate_legend<-isolate_melt %>%
#arrange(value) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, value)) %>%
mutate(Sample_ID=fct_reorder(Sample_ID, symp.asymp)) %>%
ggplot(aes(fill=symp.asymp, y=value, x=Sample_ID))+
geom_bar(stat="identity")+
scale_fill_manual(values=c("Symptomatic"="coral2", "Asymptomatic"="steelblue3"), name="DEC infection type")+
ggtitle("DEC isolate metagenome abundance")+
ylab("% abundance")+
xlab(NULL)+
geom_errorbar(data=df_isolate.mean, aes(Sample_ID, ymax = mean, ymin = mean), size=0.5, linetype = "longdash", inherit.aes = F, width = 1, color="black")+
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), panel.border=element_rect(color="black", fill=NA, size=0.5), legend.position="bottom",legend.title=element_text(face="bold"))
legend<-get_legend(isolate_legend)
l<-as_ggplot(legend)
l
#multiplots
#barplots
plot_grid(isolate_bar, uida_bar, perc_E.coli_bar, ncol=2)
#boxplots
plot_grid(isolate_box, uida_box, perc_E.coli_box, ncol=2)
#comparisons
isolate_qPCR_scat_perc
#inset multiplot
p<-plot_grid(isolate_plot,
perc_E.coli_plot,
l,
ncol=1, align="hv",
labels=c("A", "B", ""),
label_size=20,
rel_heights = c(1, 1, 0.25),
rel_widths=c(1, .9, 1))
q<-plot_grid(p)+
draw_text("Wilcoxon, p=0.0085", x = 0.1725, y = .925, size = 10)+
draw_text("Wilcoxon, p=0.11", x=0.175, y=0.475, size = 10)
q