rm(list = ls())

#load libraries

library(vegan)
library(reshape2)
library(dplyr)
library(kableExtra)
library(ggplot2)
library(gplots)
library(ComplexHeatmap)
library(circlize)
library(stringr)
library(forcats)
library(ggpubr)
library(cowplot)
library(RColorBrewer)
library(tibble)

#set theme

theme_set(theme_bw())

#import data row names for metadata should be in the same order as column names for VFDB table

#metadata
meta<-read.table(file="VFDB_metadata.tsv", sep='\t', header=TRUE)

#VFDB relative abundances
VFDB_tab_trans<-read.table(file="VFDB_EcoZUR_MGs_transformed.tsv", sep='\t', header=TRUE, row.names=1, quote="")

#filter rotavirus-positive samples and VFDB genes present in 10 or fewer metagenomes (~10% of metagenomes)

#remove rotavirus-positive samples
meta<-subset(meta, metagenome_read_file != "MG_17_bmtagger_filtered_out" & metagenome_read_file != "MG_4_bmtagger_filtered_out"& metagenome_read_file != "MG_53_CoupledReads_filtered"& metagenome_read_file != "MG_54_CoupledReads_filtered"& metagenome_read_file != "MG_56_CoupledReads_filtered"& metagenome_read_file != "MG_8_bmtagger_filtered_out"& metagenome_read_file != "MG_9_bmtagger_filtered_out"& metagenome_read_file != "MG_7_bmtagger_filtered_out")
rownames(meta)<-NULL

rota_neg<-c('B101_bmtagger_filtered_out','B147_bmtagger_filtered_out','B259_bmtagger_filtered_out','B293_bmtagger_filtered_out','B312_bmtagger_filtered_out','B329_bmtagger_filtered','B46_bmtagger_filtered_out','C26_bmtagger_filtered_out','C46_bmtagger_filtered_out','C71_bmtagger_filtered_out','E119_bmtagger_filtered_out','E130_bmtagger_filtered_out','E132_bmtagger_filtered_out','E167_bmtagger_filtered2_out','E55_bmtagger_filtered_out','E70_bmtagger_filtered_out','E82_bmtagger_filtered_out','MG_10_bmtagger_filtered_out','MG_11_bmtagger_filtered_out','MG_12_bmtagger_filtered_out','MG_13_bmtagger_filtered_out','MG_14_bmtagger_filtered_out','MG_15_bmtagger_filtered_out','MG_16_bmtagger_filtered_out','MG_18_bmtagger_filtered_out','MG_19_bmtagger_filtered_out','MG_1_bmtagger_filtered_out','MG_20_bmtagger_filtered_out','MG_21_bmtagger_filtered_out','MG_22_bmtagger_filtered_out','MG_23_bmtagger_filtered_out','MG_24_bmtagger_filtered_out','MG_25_bmtagger_filtered_out','MG_26_bmtagger_filtered_out','MG_27_CoupledReads_filtered','MG_28_bmtagger_filtered_out','MG_29_bmtagger_filtered_out','MG_2_CoupledReads_filtered','MG_30_bmtagger_filtered_out','MG_31_human_reads_filtered_out','MG_32_bmtagger_filtered_out','MG_33_bmtagger_filtered_out','MG_34_bmtagger_filtered_out','MG_35_bmtagger_filtered_out','MG_36_bmtagger_filtered_out','MG_37_CoupledReads_filtered','MG_38_CoupledReads_filtered','MG_39_CoupledReads_filtered','MG_3_bmtagger_filtered_out','MG_40_CoupledReads_filtered','MG_41_CoupledReads_filtered','MG_42_CoupledReads_filtered','MG_43_CoupledReads_filtered','MG_44_CoupledReads_filtered','MG_45_CoupledReads_filtered','MG_46_CoupledReads_filtered','MG_47_CoupledReads_filtered','MG_48_CoupledReads_filtered','MG_49_CoupledReads_filtered','MG_50','MG_51_CoupledReads_filtered','MG_52','MG_55_CoupledReads_filtered','MG_57_bmtagger_filtered_out','MG_58_bmtagger_filtered_out','MG_59_CoupledReads_filtered','MG_5_bmtagger_filtered_out','MG_60_CoupledReads_filtered','MG_61_CoupledReads_filtered','MG_62_CoupledReads_filtered','MG_63_CoupledReads_filtered','MG_64_CoupledReads_filtered','MG_65_CoupledReads_filtered','MG_66_CoupledReads_filtered','MG_67_CoupledReads_filtered','MG_68_CoupledReads_filtered','MG_69_CoupledReads_filtered','MG_6_bmtagger_filtered_out','MG_70_CoupledReads_filtered','MG_71_CoupledReads_filtered','MG_72_CoupledReads_filtered','MG_73_CoupledReads_filtered','MG_74_CoupledReads_filtered','MG_75_CoupledReads_filtered','MG_76_CoupledReads_filtered','MG_77_CoupledReads_filtered','MG_78_CoupledReads_filtered','MG_79_CoupledReads_filtered','MG_80_CoupledReads_filtered','MG_81_CoupledReads_filtered','MG_82_CoupledReads_filtered','MG_83_CoupledReads_filtered','MG_84_CoupledReads_filtered','MG_85_CoupledReads_filtered','MG_86_CoupledReads_filtered','MG_87_Coupled_Reads_filtered','MG_88_Coupled_Reads_filtered','Q199_bmtagger_filtered_out','Q312_bmtagger_filtered_out','R119_bmtagger_filtered_out','R55_bmtagger_filtered_out','R84_bmtagger_filtered_out','R85_bmtagger_filtered_out','R8_bmtagger_filtered_out')

VFDB_tab_trans<-VFDB_tab_trans[,rota_neg]

#filter genes found in <10 metagenomes
VFDB_tab_trans<-VFDB_tab_trans[ rowSums(VFDB_tab_trans > 0) >= 10, ]
VFDB_tab_trans_t<-t(VFDB_tab_trans)

#subset data by DEC infection and diarrhea (cc) status

#metadata
meta_pathpos<-subset(meta, Pathotype=="pathotype+")
meta_pathneg<-subset(meta, Pathotype=="pathotype-")
  
meta_case<-subset(meta, Clinical_description=="case")
meta_cont<-subset(meta, Clinical_description=="control")

#Make vectors with sample names (column headers) belonging to each group (make sure order is the same as metadata rowname order)
pathpos<-c('B101_bmtagger_filtered_out','B147_bmtagger_filtered_out','B259_bmtagger_filtered_out','B312_bmtagger_filtered_out','B329_bmtagger_filtered','B46_bmtagger_filtered_out','C26_bmtagger_filtered_out','C46_bmtagger_filtered_out','C71_bmtagger_filtered_out','E119_bmtagger_filtered_out','E130_bmtagger_filtered_out','E132_bmtagger_filtered_out','E167_bmtagger_filtered2_out','E55_bmtagger_filtered_out','E70_bmtagger_filtered_out','MG_10_bmtagger_filtered_out','MG_11_bmtagger_filtered_out','MG_12_bmtagger_filtered_out','MG_13_bmtagger_filtered_out','MG_15_bmtagger_filtered_out','MG_16_bmtagger_filtered_out','MG_18_bmtagger_filtered_out','MG_1_bmtagger_filtered_out','MG_21_bmtagger_filtered_out','MG_22_bmtagger_filtered_out','MG_23_bmtagger_filtered_out','MG_24_bmtagger_filtered_out','MG_25_bmtagger_filtered_out','MG_26_bmtagger_filtered_out','MG_27_CoupledReads_filtered','MG_29_bmtagger_filtered_out','MG_30_bmtagger_filtered_out','MG_31_human_reads_filtered_out','MG_32_bmtagger_filtered_out','MG_33_bmtagger_filtered_out','MG_34_bmtagger_filtered_out','MG_35_bmtagger_filtered_out','MG_3_bmtagger_filtered_out','MG_40_CoupledReads_filtered','MG_41_CoupledReads_filtered','MG_42_CoupledReads_filtered','MG_45_CoupledReads_filtered','MG_46_CoupledReads_filtered','MG_47_CoupledReads_filtered','MG_48_CoupledReads_filtered','MG_51_CoupledReads_filtered','MG_5_bmtagger_filtered_out','MG_6_bmtagger_filtered_out','Q199_bmtagger_filtered_out','Q312_bmtagger_filtered_out','R119_bmtagger_filtered_out','R55_bmtagger_filtered_out','R84_bmtagger_filtered_out','R8_bmtagger_filtered_out')
pathneg<-c('B293_bmtagger_filtered_out','E82_bmtagger_filtered_out','MG_14_bmtagger_filtered_out','MG_19_bmtagger_filtered_out','MG_20_bmtagger_filtered_out','MG_28_bmtagger_filtered_out','MG_2_CoupledReads_filtered','MG_36_bmtagger_filtered_out','MG_37_CoupledReads_filtered','MG_38_CoupledReads_filtered','MG_39_CoupledReads_filtered','MG_43_CoupledReads_filtered','MG_44_CoupledReads_filtered','MG_49_CoupledReads_filtered','MG_50','MG_52','MG_55_CoupledReads_filtered','MG_57_bmtagger_filtered_out','MG_58_bmtagger_filtered_out','MG_59_CoupledReads_filtered','MG_60_CoupledReads_filtered','MG_61_CoupledReads_filtered','MG_62_CoupledReads_filtered','MG_63_CoupledReads_filtered','MG_64_CoupledReads_filtered','MG_65_CoupledReads_filtered','MG_66_CoupledReads_filtered','MG_67_CoupledReads_filtered','MG_68_CoupledReads_filtered','MG_69_CoupledReads_filtered','MG_70_CoupledReads_filtered','MG_71_CoupledReads_filtered','MG_72_CoupledReads_filtered','MG_73_CoupledReads_filtered','MG_74_CoupledReads_filtered','MG_75_CoupledReads_filtered','MG_76_CoupledReads_filtered','MG_77_CoupledReads_filtered','MG_78_CoupledReads_filtered','MG_79_CoupledReads_filtered','MG_80_CoupledReads_filtered','MG_81_CoupledReads_filtered','MG_82_CoupledReads_filtered','MG_83_CoupledReads_filtered','MG_84_CoupledReads_filtered','MG_85_CoupledReads_filtered','MG_86_CoupledReads_filtered','MG_87_Coupled_Reads_filtered','MG_88_Coupled_Reads_filtered','R85_bmtagger_filtered_out')

case<-c('MG_10_bmtagger_filtered_out','MG_11_bmtagger_filtered_out','MG_12_bmtagger_filtered_out','MG_13_bmtagger_filtered_out','MG_14_bmtagger_filtered_out','MG_15_bmtagger_filtered_out','MG_16_bmtagger_filtered_out','MG_18_bmtagger_filtered_out','MG_19_bmtagger_filtered_out','MG_1_bmtagger_filtered_out','MG_20_bmtagger_filtered_out','MG_21_bmtagger_filtered_out','MG_22_bmtagger_filtered_out','MG_23_bmtagger_filtered_out','MG_24_bmtagger_filtered_out','MG_25_bmtagger_filtered_out','MG_26_bmtagger_filtered_out','MG_27_CoupledReads_filtered','MG_28_bmtagger_filtered_out','MG_29_bmtagger_filtered_out','MG_30_bmtagger_filtered_out','MG_31_human_reads_filtered_out','MG_32_bmtagger_filtered_out','MG_33_bmtagger_filtered_out','MG_34_bmtagger_filtered_out','MG_35_bmtagger_filtered_out','MG_36_bmtagger_filtered_out','MG_3_bmtagger_filtered_out','MG_55_CoupledReads_filtered','MG_57_bmtagger_filtered_out','MG_58_bmtagger_filtered_out','MG_59_CoupledReads_filtered','MG_5_bmtagger_filtered_out','MG_6_bmtagger_filtered_out','MG_76_CoupledReads_filtered','MG_77_CoupledReads_filtered','MG_78_CoupledReads_filtered','MG_79_CoupledReads_filtered','MG_80_CoupledReads_filtered','MG_81_CoupledReads_filtered','MG_82_CoupledReads_filtered','MG_83_CoupledReads_filtered','MG_84_CoupledReads_filtered','MG_85_CoupledReads_filtered','MG_86_CoupledReads_filtered','MG_87_Coupled_Reads_filtered','MG_88_Coupled_Reads_filtered')
control<-c('B101_bmtagger_filtered_out','B147_bmtagger_filtered_out','B259_bmtagger_filtered_out','B293_bmtagger_filtered_out','B312_bmtagger_filtered_out','B329_bmtagger_filtered','B46_bmtagger_filtered_out','C26_bmtagger_filtered_out','C46_bmtagger_filtered_out','C71_bmtagger_filtered_out','E119_bmtagger_filtered_out','E130_bmtagger_filtered_out','E132_bmtagger_filtered_out','E167_bmtagger_filtered2_out','E55_bmtagger_filtered_out','E70_bmtagger_filtered_out','E82_bmtagger_filtered_out','MG_2_CoupledReads_filtered','MG_37_CoupledReads_filtered','MG_38_CoupledReads_filtered','MG_39_CoupledReads_filtered','MG_40_CoupledReads_filtered','MG_41_CoupledReads_filtered','MG_42_CoupledReads_filtered','MG_43_CoupledReads_filtered','MG_44_CoupledReads_filtered','MG_45_CoupledReads_filtered','MG_46_CoupledReads_filtered','MG_47_CoupledReads_filtered','MG_48_CoupledReads_filtered','MG_49_CoupledReads_filtered','MG_50','MG_51_CoupledReads_filtered','MG_52','MG_60_CoupledReads_filtered','MG_61_CoupledReads_filtered','MG_62_CoupledReads_filtered','MG_63_CoupledReads_filtered','MG_64_CoupledReads_filtered','MG_65_CoupledReads_filtered','MG_66_CoupledReads_filtered','MG_67_CoupledReads_filtered','MG_68_CoupledReads_filtered','MG_69_CoupledReads_filtered','MG_70_CoupledReads_filtered','MG_71_CoupledReads_filtered','MG_72_CoupledReads_filtered','MG_73_CoupledReads_filtered','MG_74_CoupledReads_filtered','MG_75_CoupledReads_filtered','Q199_bmtagger_filtered_out','Q312_bmtagger_filtered_out','R119_bmtagger_filtered_out','R55_bmtagger_filtered_out','R84_bmtagger_filtered_out','R85_bmtagger_filtered_out','R8_bmtagger_filtered_out')

pathpos_case<-c('MG_10_bmtagger_filtered_out','MG_11_bmtagger_filtered_out','MG_12_bmtagger_filtered_out','MG_13_bmtagger_filtered_out','MG_15_bmtagger_filtered_out','MG_16_bmtagger_filtered_out','MG_18_bmtagger_filtered_out','MG_1_bmtagger_filtered_out','MG_21_bmtagger_filtered_out','MG_22_bmtagger_filtered_out','MG_23_bmtagger_filtered_out','MG_24_bmtagger_filtered_out','MG_25_bmtagger_filtered_out','MG_26_bmtagger_filtered_out','MG_27_CoupledReads_filtered','MG_29_bmtagger_filtered_out','MG_30_bmtagger_filtered_out','MG_31_human_reads_filtered_out','MG_32_bmtagger_filtered_out','MG_33_bmtagger_filtered_out','MG_34_bmtagger_filtered_out','MG_35_bmtagger_filtered_out','MG_3_bmtagger_filtered_out','MG_5_bmtagger_filtered_out','MG_6_bmtagger_filtered_out')
pathpos_control<-c('B101_bmtagger_filtered_out','B147_bmtagger_filtered_out','B259_bmtagger_filtered_out','B312_bmtagger_filtered_out','B329_bmtagger_filtered','B46_bmtagger_filtered_out','C26_bmtagger_filtered_out','C46_bmtagger_filtered_out','C71_bmtagger_filtered_out','E119_bmtagger_filtered_out','E130_bmtagger_filtered_out','E132_bmtagger_filtered_out','E167_bmtagger_filtered2_out','E55_bmtagger_filtered_out','E70_bmtagger_filtered_out','MG_40_CoupledReads_filtered','MG_41_CoupledReads_filtered','MG_42_CoupledReads_filtered','MG_45_CoupledReads_filtered','MG_46_CoupledReads_filtered','MG_47_CoupledReads_filtered','MG_48_CoupledReads_filtered','MG_51_CoupledReads_filtered','Q199_bmtagger_filtered_out','Q312_bmtagger_filtered_out','R119_bmtagger_filtered_out','R55_bmtagger_filtered_out','R85_bmtagger_filtered_out','R8_bmtagger_filtered_out')
pathneg_case<-c('MG_14_bmtagger_filtered_out','MG_19_bmtagger_filtered_out','MG_20_bmtagger_filtered_out','MG_28_bmtagger_filtered_out','MG_36_bmtagger_filtered_out','MG_55_CoupledReads_filtered','MG_57_bmtagger_filtered_out','MG_58_bmtagger_filtered_out','MG_59_CoupledReads_filtered','MG_76_CoupledReads_filtered','MG_77_CoupledReads_filtered','MG_78_CoupledReads_filtered','MG_79_CoupledReads_filtered','MG_80_CoupledReads_filtered','MG_81_CoupledReads_filtered','MG_82_CoupledReads_filtered','MG_83_CoupledReads_filtered','MG_84_CoupledReads_filtered','MG_85_CoupledReads_filtered','MG_86_CoupledReads_filtered','MG_87_Coupled_Reads_filtered','MG_88_Coupled_Reads_filtered')
pathneg_control<-c('B293_bmtagger_filtered_out','E82_bmtagger_filtered_out','MG_2_CoupledReads_filtered','MG_37_CoupledReads_filtered','MG_38_CoupledReads_filtered','MG_39_CoupledReads_filtered','MG_43_CoupledReads_filtered','MG_44_CoupledReads_filtered','MG_49_CoupledReads_filtered','MG_50','MG_52','MG_60_CoupledReads_filtered','MG_61_CoupledReads_filtered','MG_62_CoupledReads_filtered','MG_63_CoupledReads_filtered','MG_64_CoupledReads_filtered','MG_65_CoupledReads_filtered','MG_66_CoupledReads_filtered','MG_67_CoupledReads_filtered','MG_68_CoupledReads_filtered','MG_69_CoupledReads_filtered','MG_70_CoupledReads_filtered','MG_71_CoupledReads_filtered','MG_72_CoupledReads_filtered','MG_73_CoupledReads_filtered','MG_74_CoupledReads_filtered','MG_75_CoupledReads_filtered','R84_bmtagger_filtered_out')

#VFDB relative abundance matrix  
VFDB_tab_trans_pathpos<-VFDB_tab_trans[,pathpos]
VFDB_tab_trans_pathneg<-VFDB_tab_trans[,pathneg]

VFDB_tab_trans_case<-VFDB_tab_trans[,case]
VFDB_tab_trans_cont<-VFDB_tab_trans[,control]

#Kruskal-Wallis testing by DEC infection and diarrhea status (sample group)

#reformat data
VFDB_tab_trans_t<-as.data.frame(VFDB_tab_trans_t)
clin<-meta$Clinical_description 
path<-meta$Pathotype 
names<-meta$metagenome_read_file

VFDB_tab_trans_meta<-cbind(clin, VFDB_tab_trans_t) 
VFDB_tab_trans_meta<-cbind(path, VFDB_tab_trans_meta) 
VFDB_tab_trans_meta<-cbind(names, VFDB_tab_trans_meta) 

VFDB_meta_melt<-VFDB_tab_trans_meta %>%
  melt(measure.vars=c("names")) %>%
  relocate(path, .after=last_col())%>%
  relocate(clin, .after=last_col())

VFDB_meta_melt$cc_path<-str_c(VFDB_meta_melt$cc, "_", VFDB_meta_melt$path)

KW_raw_pvalue <- numeric(length = length(1:303))
for (i in (1:303)) {
  KW_raw_pvalue[i] <- wilcox.test(VFDB_meta_melt[, i] ~ VFDB_meta_melt$cc_path,
     )$p.value}

KW_df <- data.frame(Variable = names(VFDB_meta_melt[, 1:303]), KW_raw_pvalue = round(KW_raw_pvalue, 8))
KW_df$BH <-p.adjust(KW_df$KW_raw_pvalue,method = "BH")

KW_sig_genes<-subset(KW_df, BH<=0.05)
kbl(KW_sig_genes) %>%
  kable_paper(bootstrap_options = "striped", font_size=10)
Variable KW_raw_pvalue BH
18 VFG000929(gb|NP_752599)(entD)phosphopantetheinyl_transferase_component_of_enterobactin_synthase_multienzyme_complex[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0022105 0.0216060
20 VFG044165(gb|NP_752609)_(entS)enterobactin_exporter,iron-regulated[enterobactin_(IA019)][Escherichia_coli_CFT073] 0.0055774 0.0349131
22 VFG000889(gb|NP_755459)_(papF)_P_pilus_minor_subunit_PapF__[P_fimbriae_(VF0220)]_[Escherichia_coli_CFT073] 0.0055581 0.0349131
24 VFG013064(gb|YP_405019)(shuA)outer_membrane_heme/hemoglobin_receptor_ShuA[Shu_(VF0256)][Shigella_dysenteriae_Sd197] 0.0073514 0.0404995
32 VFG042435(gb|CAA62863)(afaE-V)AfaE-V_adhesin[Afimbrial_adhesin,AFA-V(AI013)][Escherichia_coli_str._AL_851] 0.0105693 0.0484404
35 VFG000945(gb|AAK16478)_(draD)DraD,periplasmic_protein[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0004852 0.0087034
41 VFG002054(gb|YP_404607)(gspK)general_secretion_pathway_protein_K[T2SS_(VF0333)][Shigella_dysenteriae_Sd197] 0.0057612 0.0349131
46 VFG000920(gb|NP_756177)(chuX)putative_heme-binding_protein_ChuX[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0010608 0.0122349
49 VFG050160(gb|YP_002392932.1)(kpsU)3-deoxy-manno-octulosonate_cytidylyltransferase[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000081 0.0003515
57 VFG042446(gb|ABU51870)_(daaC)_DaaC_usher_protein__[F1845_fimbrial_adhesin_(AI016)]_[Escherichia_coli_str._C1845] 0.0015060 0.0152107
61 VFG000944(gb|AAK16477)(draC)usher_protein_DraC[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000077 0.0003515
68 VFG042423(gb|CAW30797)(afaA)fimbrial_major_subunit_AfaA[Afimbrial_adhesin,_AFA-I,mannose-resistant_adhesin(AI010)][Escherichia_coli_O25b:H4_str._FV9863] 0.0003108 0.0062790
71 VFG000943(gb|AAK16476)(draB)chaperone_protein_DraB[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000144 0.0004838
73 VFG042425(gb|CAW30799)(afaC-I)usher_AfaC[Afimbrial_adhesin,_AFA-I,mannose-resistant_adhesin(AI010)][Escherichia_coli_O25b:H4_str._FV9863] 0.0000011 0.0003060
74 VFG000946(gb|AAK16479)(draP)DraP[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000820 0.0022579
78 VFG000918(gb|NP_756175)(chuT)periplasmic_heme-binding_protein_ChuT[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0043422 0.0320899
84 VFG002047(gb|YP_404600)(gspD)general_secretion_pathway_protein_D[T2SS_(VF0333)][Shigella_dysenteriae_Sd197] 0.0096514 0.0449903
92 VFG002415(gb|NP_286008)(yagX/ecpC)E._coli_common_pilus_usher_EcpC[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0087174 0.0431867
97 VFG050157(gb|YP_002392929.1)(kpsF)Polysialic_acid_capsule_expression_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000071 0.0003515
99 VFG000921(gb|NP_756178)(chuY)ChuY[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0006166 0.0088971
100 VFG000942(gb|AAK16475)(draA)cytoplamic_protein_DraA[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000073 0.0003515
105 VFG002053(gb|YP_404606)(gspJ)general_secretion_pathway_protein_J[T2SS_(VF0333)][Shigella_dysenteriae_Sd197] 0.0091494 0.0433167
107 VFG000932(gb|NP_752612)(entE)2,3-dihydroxybenzoate-AMP_ligase_component_of_enterobactin_synthase_multienzyme_complex[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0039232 0.0312824
110 VFG002416(gb|NP_286007)(yagW/ecpD)polymerized_tip_adhesin_of_ECP_fibers[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0079314 0.0412283
112 VFG000881_(papB)regulatory_protein_PapB[P_fimbriae_(VF0220)]_[Escherichia_coli_CFT073] 0.0040843 0.0317320
118 VFG000361(gb|NP_405470)(ybtU)yersiniabactin_biosynthetic_protein_YbtU[Yersiniabactin_(VF0136)][Yersinia_pestis_CO92] 0.0082405 0.0416147
121 VFG002037(gb|AAA24685)(eltA)heat-labile_enterotoxin_A_prepeptide(from_human)[Heat-labile_toxin_(LT)_(VF0210)]_[Escherichia_coli] 0.0005716 0.0087034
124 VFG000925(gb|NP_752606)(fepC)ferrienterobactin_ABC_transporter_ATPase[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0038216 0.0312824
125 VFG034908(gb|NP_286160)(espY3)Type_III_secretion_system_effector_EspY3[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0077693 0.0412283
131 VFG002414(gb|NP_286010)(yagZ/ecpA)E._coli_common_pilus_structural_subunit_EcpA[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0005745 0.0087034
132 VFG042552(gb|YP_002295985)(faeD)fimbrial_usher_protein_FaeD[K88_pili/F4_fimbriae_(AI040)][Escherichia_coli_SE11] 0.0031415 0.0271968
146 VFG002038(gb|AAA98064)(eltB)enterotoxin_subunit_B(from_human)[Heat-labile_toxin_(LT)_(VF0210)]_[Escherichia_coli] 0.0005282 0.0087034
152 VFG034815(gb|NP_287686)(espR1)Type_III_secretion_system_effector_espR1[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0107112 0.0484404
167 VFG002052(gb|YP_404605)(gspI)general_secretion_pathway_protein_I[T2SS_(VF0333)][Shigella_dysenteriae_Sd197] 0.0054245 0.0349131
169 VFG000919(gb|NP_756176)(chuW)Putative_oxygen_independent_coproporphyrinogen_III_oxidase[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0050615 0.0340808
173 VFG002412(gb|NP_286009)(yagY/ecpB)E._coli_common_pilus_chaperone_EcpB[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0028251 0.0259396
174 VFG000917(gb|NP_756170)(chuA)Outer_membrane_heme/hemoglobin_receptor_ChuA[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0007126 0.0093880
176 VFG002417(gb|NP_286006)(yagV/ecpE)E._coli_common_pilus_chaperone_EcpE[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0070613 0.0403691
181 VFG048621(gb|YP_001688090.1)(iutA)ferric_aerobactin_receptor_IutA[Aerobactin_(VF0565)][Klebsiella_pneumoniae_subsp._pneumoniae_NTUH-K2044] 0.0050262 0.0340808
190 VFG050161(gb|YP_002392933.1)(kpsC)Capsule_polysaccharide_export_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000349 0.0010566
195 VFG001444(gb|AAG10151)(aslA)putative_arylsulfatase[AslA_(VF0238)][Escherichia_coli_O18:K1:H7_str._RS218] 0.0006746 0.0092911
196 VFG000863(gb|BAA94855)(astA)heat-stable_enterotoxin_1[EAST1_(VF0216)][Escherichia_coli_O44:H18_042] 0.0077731 0.0412283
200 VFG050162(gb|YP_002392934.1)(kpsS)Capsule_polysaccharide_export_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0002489 0.0053876
203 VFG050158(gb|YP_002392930.1)(kpsE)Capsule_polysaccharide_export_inner-membrane_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000039 0.0003515
211 VFG002050(gb|YP_404603)(gspG)general_secretion_pathway_protein_G[T2SS_(VF0333)][Shigella_dysenteriae_Sd197] 0.0062134 0.0362048
212 VFG000618(gb|NP_709457)_(iucD)aerobactin_synthesis_protein_IucC,lysine_6-monooxygenase[Aerobactin_(VF0123)][Shigella_flexneri_2a_str._301] 0.0089230 0.0431867
214 VFG002046(gb|YP_404599)(gspC)general_secretion_pathway_protein_C[T2SS_(VF0333)][Shigella_dysenteriae_Sd197] 0.0001382 0.0032223
225 VFG000616(gb|NP_709455)(iucB)aerobactin_synthesis_protein_IucB[Aerobactin_(VF0123)][Shigella_flexneri_2a_str._301] 0.0031005 0.0271968
230 VFG000926(gb|NP_752608)(fepD)ferrienterobactin_ABC_transporter_permease[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0043383 0.0320899
231 VFG042447(gb|AAA23662)(daaF)DaaF_protein[F1845_fimbrial_adhesin_(AI016)][Escherichia_coli_str._C1845] 0.0001382 0.0032223
244 VFG002051(gb|YP_404604)(gspH)general_secretion_pathway_protein_H[T2SS_(VF0333)][Shigella_dysenteriae_Sd197] 0.0056927 0.0349131
248 VFG000669(gb|NP_706257)(gtrA)bactoprenol-linked_glucose_translocase/flippase[LPS_(VF0124)][Shigella_flexneri_2a_str._301] 0.0047421 0.0340808
251 VFG042424(gb|CAW30798)(afaB-I)chaperone_AfaB[Afimbrial_adhesin,_AFA-I,mannose-resistant_adhesin(AI010)][Escherichia_coli_O25b:H4_str._FV9863] 0.0000020 0.0003060
256 VFG000902(gb|NP_755494)(sat)Aecreted_auto_transpoter_toxin[Sat_(VF0231)][Escherichia_coli_CFT073] 0.0059818 0.0355388
258 VFG050169(gb|YP_002392941.1)(kpsM)Polysialic_acid_transport_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0011392 0.0123273
259 VFG000880(gb|NP_755468)(papI)regulatory_protein_PapI[P_fimbriae_(VF0220)][Escherichia_coli_CFT073] 0.0080280 0.0412283
263 VFG034772(gb|NP_290644)(espL4)Type_III_secretion_system_effector_EspL4[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0039232 0.0312824
264 VFG034866(gb|NP_290672)(espX4)Type_III_secretion_system_effector_EspX4[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0072128 0.0404717
266 VFG000938(gb|NP_755500)(iucC)aerobactin_siderophore_biosynthesis_protein_IucC[Aerobactin_(VF0229)][Escherichia_coli_CFT073] 0.0089794 0.0431867
270 VFG034873(gb|NP_290699)(espX5)Type_III_secretion_system_effector_EspX5[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0048943 0.0340808
276 VFG000617(gb|NP_709456)(iucC)aerobactin_synthesis_protein_IucC[Aerobactin_(VF0123)][Shigella_flexneri_2a_str._301] 0.0010902 0.0122349
278 VFG000916(gb|NP_756169)(chuS)heme_oxygenase_ChuS[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0005611 0.0087034
285 VFG050159(gb|YP_002392931.1)(kpsD)Polysialic_acid_transport_protein_precursor[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000102 0.0003867
287 VFG034895(gb|NP_285753)(espY1)Type_III_secretion_system_effector_EspY1[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0014563 0.0152107
293 VFG000904(gb|NP_752330)(vat)Haemoglobin_protease[Tsh_(VF0233)][Escherichia_coli_CFT073] 0.0008410 0.0106183
298 VFG000924(gb|NP_752610)(fepB)ferrienterobactin_ABC_transporter_periplasmic_binding_protein[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0028008 0.0259396
299 VFG002413(gb|NP_286011)(ykgK/ecpR)regulator_protein_EcpR[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0009070 0.0109924
#for only E. coli genes  
EC_KW_df<-KW_df %>% filter(str_detect(Variable, "Escherichia_coli"))
EC_KW_df$BH_Ec <-p.adjust(EC_KW_df$KW_raw_pvalue,method = "BH")

Ec_KW_sig_genes<-subset(EC_KW_df, BH_Ec<=0.05)
kbl(Ec_KW_sig_genes) %>%
  kable_paper(bootstrap_options = "striped", font_size=10)
Variable KW_raw_pvalue BH BH_Ec
3 VFG000929(gb|NP_752599)(entD)phosphopantetheinyl_transferase_component_of_enterobactin_synthase_multienzyme_complex[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0022105 0.0216060 0.0096043
4 VFG044165(gb|NP_752609)_(entS)enterobactin_exporter,iron-regulated[enterobactin_(IA019)][Escherichia_coli_CFT073] 0.0055774 0.0349131 0.0167320
5 VFG000889(gb|NP_755459)_(papF)_P_pilus_minor_subunit_PapF__[P_fimbriae_(VF0220)]_[Escherichia_coli_CFT073] 0.0055581 0.0349131 0.0167320
8 VFG042435(gb|CAA62863)(afaE-V)AfaE-V_adhesin[Afimbrial_adhesin,AFA-V(AI013)][Escherichia_coli_str._AL_851] 0.0105693 0.0484404 0.0254645
10 VFG000945(gb|AAK16478)_(draD)DraD,periplasmic_protein[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0004852 0.0087034 0.0038097
13 VFG000920(gb|NP_756177)(chuX)putative_heme-binding_protein_ChuX[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0010608 0.0122349 0.0053467
14 VFG050160(gb|YP_002392932.1)(kpsU)3-deoxy-manno-octulosonate_cytidylyltransferase[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000081 0.0003515 0.0001462
15 VFG044172(gb|NP_756180)(chuV)ATP-binding_hydrophilic_protein_ChuV[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0186793 0.0681907 0.0362091
17 VFG042558(gb|YP_002295991)(faeJ)fimbrial_protein_FaeJ[K88_pili/F4_fimbriae_(AI040)][Escherichia_coli_SE11] 0.0180945 0.0668614 0.0356236
18 VFG042446(gb|ABU51870)_(daaC)_DaaC_usher_protein__[F1845_fimbrial_adhesin_(AI016)]_[Escherichia_coli_str._C1845] 0.0015060 0.0152107 0.0067770
19 VFG000944(gb|AAK16477)(draC)usher_protein_DraC[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000077 0.0003515 0.0001462
20 VFG042423(gb|CAW30797)(afaA)fimbrial_major_subunit_AfaA[Afimbrial_adhesin,_AFA-I,mannose-resistant_adhesin(AI010)][Escherichia_coli_O25b:H4_str._FV9863] 0.0003108 0.0062790 0.0027976
21 VFG045349(gb|YP_002390132)(fdeC)adhesin_FdeC[FdeC_(VF0506)][Escherichia_coli_O45:K1:H7_str._S88] 0.0114127 0.0508538 0.0266297
22 VFG000943(gb|AAK16476)(draB)chaperone_protein_DraB[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000144 0.0004838 0.0002012
23 VFG000930(gb|NP_752604)_(entF)enterobactin_synthase_multienzyme_complex_component,ATP-dependent[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0127811 0.0518498 0.0274084
24 VFG042425(gb|CAW30799)(afaC-I)usher_AfaC[Afimbrial_adhesin,_AFA-I,mannose-resistant_adhesin(AI010)][Escherichia_coli_O25b:H4_str._FV9863] 0.0000011 0.0003060 0.0001273
25 VFG000946(gb|AAK16479)(draP)DraP[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000820 0.0022579 0.0009389
26 VFG000918(gb|NP_756175)(chuT)periplasmic_heme-binding_protein_ChuT[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0043422 0.0320899 0.0143978
28 VFG002415(gb|NP_286008)(yagX/ecpC)E._coli_common_pilus_usher_EcpC[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0087174 0.0431867 0.0219677
29 VFG050157(gb|YP_002392929.1)(kpsF)Polysialic_acid_capsule_expression_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000071 0.0003515 0.0001462
31 VFG000921(gb|NP_756178)(chuY)ChuY[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0006166 0.0088971 0.0038848
32 VFG000942(gb|AAK16475)(draA)cytoplamic_protein_DraA[Dr_adhesins_(VF0223)][Escherichia_coli_O75:K5:H-_str._IH11128] 0.0000073 0.0003515 0.0001462
35 VFG000932(gb|NP_752612)(entE)2,3-dihydroxybenzoate-AMP_ligase_component_of_enterobactin_synthase_multienzyme_complex[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0039232 0.0312824 0.0141236
38 VFG002416(gb|NP_286007)(yagW/ecpD)polymerized_tip_adhesin_of_ECP_fibers[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0079314 0.0412283 0.0206433
40 VFG000881_(papB)regulatory_protein_PapB[P_fimbriae_(VF0220)]_[Escherichia_coli_CFT073] 0.0040843 0.0317320 0.0142951
42 VFG002037(gb|AAA24685)(eltA)heat-labile_enterotoxin_A_prepeptide(from_human)[Heat-labile_toxin_(LT)_(VF0210)]_[Escherichia_coli] 0.0005716 0.0087034 0.0038097
44 VFG000925(gb|NP_752606)(fepC)ferrienterobactin_ABC_transporter_ATPase[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0038216 0.0312824 0.0141236
45 VFG034908(gb|NP_286160)(espY3)Type_III_secretion_system_effector_EspY3[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0077693 0.0412283 0.0206433
49 VFG002414(gb|NP_286010)(yagZ/ecpA)E._coli_common_pilus_structural_subunit_EcpA[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0005745 0.0087034 0.0038097
50 VFG042552(gb|YP_002295985)(faeD)fimbrial_usher_protein_FaeD[K88_pili/F4_fimbriae_(AI040)][Escherichia_coli_SE11] 0.0031415 0.0271968 0.0123698
57 VFG002038(gb|AAA98064)(eltB)enterotoxin_subunit_B(from_human)[Heat-labile_toxin_(LT)_(VF0210)]_[Escherichia_coli] 0.0005282 0.0087034 0.0038097
61 VFG034815(gb|NP_287686)(espR1)Type_III_secretion_system_effector_espR1[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0107112 0.0484404 0.0254645
65 VFG000937(gb|NP_755499)(iucD)L-lysine_6-monooxygenase_IucD[Aerobactin_(VF0229)][Escherichia_coli_CFT073] 0.0128341 0.0518498 0.0274084
66 VFG000919(gb|NP_756176)(chuW)Putative_oxygen_independent_coproporphyrinogen_III_oxidase[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0050615 0.0340808 0.0159437
69 VFG002412(gb|NP_286009)(yagY/ecpB)E._coli_common_pilus_chaperone_EcpB[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0028251 0.0259396 0.0114827
70 VFG000917(gb|NP_756170)(chuA)Outer_membrane_heme/hemoglobin_receptor_ChuA[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0007126 0.0093880 0.0040814
71 VFG002417(gb|NP_286006)(yagV/ecpE)E._coli_common_pilus_chaperone_EcpE[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0070613 0.0403691 0.0201958
74 VFG034849(gb|NP_285716)(espX1)Type_III_secretion_system_effector_EspX1[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0125060 0.0518498 0.0274084
75 VFG050161(gb|YP_002392933.1)(kpsC)Capsule_polysaccharide_export_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000349 0.0010566 0.0004394
78 VFG001444(gb|AAG10151)(aslA)putative_arylsulfatase[AslA_(VF0238)][Escherichia_coli_O18:K1:H7_str._RS218] 0.0006746 0.0092911 0.0040476
79 VFG000863(gb|BAA94855)(astA)heat-stable_enterotoxin_1[EAST1_(VF0216)][Escherichia_coli_O44:H18_042] 0.0077731 0.0412283 0.0206433
80 VFG000923(gb|NP_752600)(fepA)ferrienterobactin_outer_membrane_transporter[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0167691 0.0635128 0.0335381
81 VFG001713(gb|NP_755457)(papX)PapX_protein_regulates_flagellum_synthesis_to_repress_motility[P_fimbriae_(CVF425)][Escherichia_coli_CFT073] 0.0128120 0.0518498 0.0274084
82 VFG050162(gb|YP_002392934.1)(kpsS)Capsule_polysaccharide_export_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0002489 0.0053876 0.0024127
83 VFG050158(gb|YP_002392930.1)(kpsE)Capsule_polysaccharide_export_inner-membrane_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000039 0.0003515 0.0001462
84 VFG000878(gb|NP_757247)(fimG)FimG_protein_precursor[Type_1_fimbriae_(VF0221)][Escherichia_coli_CFT073] 0.0125343 0.0518498 0.0274084
88 VFG000931(gb|NP_752611)(entC)isochorismate_synthase_1[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0199570 0.0719878 0.0380998
91 VFG000926(gb|NP_752608)(fepD)ferrienterobactin_ABC_transporter_permease[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0043383 0.0320899 0.0143978
92 VFG042447(gb|AAA23662)(daaF)DaaF_protein[F1845_fimbrial_adhesin_(AI016)][Escherichia_coli_str._C1845] 0.0001382 0.0032223 0.0014516
96 VFG042424(gb|CAW30798)(afaB-I)chaperone_AfaB[Afimbrial_adhesin,_AFA-I,mannose-resistant_adhesin(AI010)][Escherichia_coli_O25b:H4_str._FV9863] 0.0000020 0.0003060 0.0001273
100 VFG000902(gb|NP_755494)(sat)Aecreted_auto_transpoter_toxin[Sat_(VF0231)][Escherichia_coli_CFT073] 0.0059818 0.0355388 0.0175280
101 VFG050169(gb|YP_002392941.1)(kpsM)Polysialic_acid_transport_protein[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0011392 0.0123273 0.0055205
102 VFG000880(gb|NP_755468)(papI)regulatory_protein_PapI[P_fimbriae_(VF0220)][Escherichia_coli_CFT073] 0.0080280 0.0412283 0.0206433
104 VFG034772(gb|NP_290644)(espL4)Type_III_secretion_system_effector_EspL4[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0039232 0.0312824 0.0141236
105 VFG034866(gb|NP_290672)(espX4)Type_III_secretion_system_effector_EspX4[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0072128 0.0404717 0.0201958
106 VFG000938(gb|NP_755500)(iucC)aerobactin_siderophore_biosynthesis_protein_IucC[Aerobactin_(VF0229)][Escherichia_coli_CFT073] 0.0089794 0.0431867 0.0221844
109 VFG034873(gb|NP_290699)(espX5)Type_III_secretion_system_effector_EspX5[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0048943 0.0340808 0.0158124
112 VFG000916(gb|NP_756169)(chuS)heme_oxygenase_ChuS[Chu_(VF0227)][Escherichia_coli_CFT073] 0.0005611 0.0087034 0.0038097
113 VFG000933(gb|NP_752613)(entB)isochorismatase[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0151543 0.0588685 0.0313023
115 VFG042459(gb|AAC41418)(cfaD’)colonisation_factor_antigen_d’[Colonization_factor_antigen_I_fimbriae_(CFA/I)_(AI018)][Escherichia_coli_str._E7473] 0.0140936 0.0554592 0.0295965
116 VFG050159(gb|YP_002392931.1)(kpsD)Polysialic_acid_transport_protein_precursor[K1_capsule_(VF0239)][Escherichia_coli_O45:K1:H7_str._S88] 0.0000102 0.0003867 0.0001608
118 VFG034895(gb|NP_285753)(espY1)Type_III_secretion_system_effector_EspY1[TTSS_(VF0191)][Escherichia_coli_O157:H7_str._EDL933] 0.0014563 0.0152107 0.0067770
121 VFG000904(gb|NP_752330)(vat)Haemoglobin_protease[Tsh_(VF0233)][Escherichia_coli_CFT073] 0.0008410 0.0106183 0.0046075
123 VFG000877(gb|NP_757245)(fimF)FimF_protein_precursor[Type_1_fimbriae_(VF0221)][Escherichia_coli_CFT073] 0.0155699 0.0597175 0.0316421
124 VFG000924(gb|NP_752610)(fepB)ferrienterobactin_ABC_transporter_periplasmic_binding_protein[Enterobactin_(VF0228)][Escherichia_coli_CFT073] 0.0028008 0.0259396 0.0114827
125 VFG002413(gb|NP_286011)(ykgK/ecpR)regulator_protein_EcpR[ECP_(VF0404)][Escherichia_coli_O157:H7_str._EDL933] 0.0009070 0.0109924 0.0047615

#format data for plots

#format data
KW_sig_genes<-subset(VFDB_tab_trans, rownames(VFDB_tab_trans) %in% KW_sig_genes$Variable)
Ec_KW_sig_genes<-subset(VFDB_tab_trans, rownames(VFDB_tab_trans) %in% Ec_KW_sig_genes$Variable)

KW_sig_genes_mat<-data.matrix(KW_sig_genes)
Ec_KW_sig_genes_mat<-data.matrix(Ec_KW_sig_genes)

#calculate relative abundances
KW_sig_genes_pathpos_case<-KW_sig_genes[,pathpos_case]
KW_sig_genes_pathpos_control<-KW_sig_genes[,pathpos_control]
KW_sig_genes_pathneg_case<-KW_sig_genes[,pathneg_case]
KW_sig_genes_pathneg_control<-KW_sig_genes[,pathneg_control]
Ec_KW_sig_genes_pathpos_case<-Ec_KW_sig_genes[,pathpos_case]
Ec_KW_sig_genes_pathpos_control<-Ec_KW_sig_genes[,pathpos_control]
Ec_KW_sig_genes_pathneg_case<-Ec_KW_sig_genes[,pathneg_case]
Ec_KW_sig_genes_pathneg_control<-Ec_KW_sig_genes[,pathneg_control]

mean_KW_sig_genes_pathpos_case<-rowMeans(KW_sig_genes_pathpos_case)
mean_KW_sig_genes_pathpos_control<-rowMeans(KW_sig_genes_pathpos_control)
mean_KW_sig_genes_pathneg_case<-rowMeans(KW_sig_genes_pathneg_case)
mean_KW_sig_genes_pathneg_control<-rowMeans(KW_sig_genes_pathneg_control)
mean_Ec_KW_sig_genes_pathpos_case<-rowMeans(Ec_KW_sig_genes_pathpos_case)
mean_Ec_KW_sig_genes_pathpos_control<-rowMeans(Ec_KW_sig_genes_pathpos_control)
mean_Ec_KW_sig_genes_pathneg_case<-rowMeans(Ec_KW_sig_genes_pathneg_case)
mean_Ec_KW_sig_genes_pathneg_control<-rowMeans(Ec_KW_sig_genes_pathneg_control)

mean_KW<-data.frame(pathpos_case=round(mean_KW_sig_genes_pathpos_case, 5),
                                pathpos_control=round(mean_KW_sig_genes_pathpos_control, 5),
                                pathneg_case=round(mean_KW_sig_genes_pathneg_case, 5), 
                                pathneg_control=round(mean_KW_sig_genes_pathneg_control, 5))
mean_KW_mat<-data.matrix(mean_KW)

mean_Ec_KW<-data.frame(pathpos_case=round(mean_Ec_KW_sig_genes_pathpos_case, 5),
                                pathpos_control=round(mean_Ec_KW_sig_genes_pathpos_control, 5),
                                pathneg_case=round(mean_Ec_KW_sig_genes_pathneg_case, 5), 
                                pathneg_control=round(mean_Ec_KW_sig_genes_pathneg_control, 5))
mean_Ec_KW_mat<-data.matrix(mean_Ec_KW)

#Comparison of total number of virulence genes detected by DEC infection and diarrhea status

#convert VFDB dataframe to presence/absence
VFDB_PA<-VFDB_tab_trans %>% 
  mutate_if(is.numeric, ~1 * (. !=0)) 
colnames(VFDB_PA)<-meta$sample_name
VFDB_PA<-t(VFDB_PA)


#new dataframes with rowsums
VFDB_rowsums<-data.frame(rowSums=rowSums(VFDB_PA), cc=meta$Clin, path=meta$Pathotype, cc_path=meta$Pathotype_Clinical)

VFDB_rowsums_case<-subset(VFDB_rowsums, cc=="case")
VFDB_rowsums_control<-subset(VFDB_rowsums, cc=="control")

VFDB_rowsums_pathpos<-subset(VFDB_rowsums, path=="pathotype+")
VFDB_rowsums_pathneg<-subset(VFDB_rowsums, path=="pathotype-")

VFDB_rowsums_pathpos_case<-subset(VFDB_rowsums, path=="pathotype+" & cc=="case")
VFDB_rowsums_pathpos_control<-subset(VFDB_rowsums, path=="pathotype+" & cc=="control")

VFDB_rowsums_pathneg_case<-subset(VFDB_rowsums, path=="pathotype-" & cc=="case")
VFDB_rowsums_pathneg_control<-subset(VFDB_rowsums, path=="pathotype-" & cc=="control")

#nonparametric pairwise tests
wilcox.test(VFDB_rowsums_case$rowSums, VFDB_rowsums_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  VFDB_rowsums_case$rowSums and VFDB_rowsums_control$rowSums
## W = 1671.5, p-value = 0.03036
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(VFDB_rowsums_pathpos$rowSums, VFDB_rowsums_pathneg$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  VFDB_rowsums_pathpos$rowSums and VFDB_rowsums_pathneg$rowSums
## W = 1724.5, p-value = 0.01496
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(VFDB_rowsums_pathpos_case$rowSums, VFDB_rowsums_pathpos_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  VFDB_rowsums_pathpos_case$rowSums and VFDB_rowsums_pathpos_control$rowSums
## W = 497.5, p-value = 0.01962
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(VFDB_rowsums_pathneg_case$rowSums, VFDB_rowsums_pathneg_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  VFDB_rowsums_pathneg_case$rowSums and VFDB_rowsums_pathneg_control$rowSums
## W = 344.5, p-value = 0.4816
## alternative hypothesis: true location shift is not equal to 0

#Compare total number of E. coli-annotated virulence genes detected by DEC infection and diarrhea status

#subset E. coli genes
EC_VFDB_PA<-VFDB_PA %>%
  as.data.frame() %>%
  select(+contains("Escherichia_coli")) %>%
  mutate_if(is.numeric, ~1 * (. !=0))

#new dataframes with rowsums
EC_VFDB_rowsums<-data.frame(rowSums=rowSums(EC_VFDB_PA), cc=meta$Clin, path=meta$Pathotype, cc_path=meta$Pathotype_Clinical)

EC_VFDB_rowsums_case<-subset(EC_VFDB_rowsums, cc=="case")
EC_VFDB_rowsums_control<-subset(EC_VFDB_rowsums, cc=="control")

EC_VFDB_rowsums_pathpos<-subset(EC_VFDB_rowsums, path=="pathotype+")
EC_VFDB_rowsums_pathneg<-subset(EC_VFDB_rowsums, path=="pathotype-")

EC_VFDB_rowsums_pathpos_case<-subset(EC_VFDB_rowsums, path=="pathotype+" & cc=="case")
EC_VFDB_rowsums_pathpos_control<-subset(EC_VFDB_rowsums, path=="pathotype+" & cc=="control")

EC_VFDB_rowsums_pathneg_case<-subset(EC_VFDB_rowsums, path=="pathotype-" & cc=="case")
EC_VFDB_rowsums_pathneg_control<-subset(EC_VFDB_rowsums, path=="pathotype-" & cc=="control")

#nonparametric pairwise tests
wilcox.test(EC_VFDB_rowsums_case$rowSums, EC_VFDB_rowsums_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  EC_VFDB_rowsums_case$rowSums and EC_VFDB_rowsums_control$rowSums
## W = 1700.5, p-value = 0.01852
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(EC_VFDB_rowsums_pathpos$rowSums, EC_VFDB_rowsums_pathneg$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  EC_VFDB_rowsums_pathpos$rowSums and EC_VFDB_rowsums_pathneg$rowSums
## W = 1944, p-value = 0.0001125
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(EC_VFDB_rowsums_pathpos_case$rowSums, EC_VFDB_rowsums_pathpos_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  EC_VFDB_rowsums_pathpos_case$rowSums and EC_VFDB_rowsums_pathpos_control$rowSums
## W = 501.5, p-value = 0.01624
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(EC_VFDB_rowsums_pathneg_case$rowSums, EC_VFDB_rowsums_pathneg_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  EC_VFDB_rowsums_pathneg_case$rowSums and EC_VFDB_rowsums_pathneg_control$rowSums
## W = 354, p-value = 0.3737
## alternative hypothesis: true location shift is not equal to 0

#Compare total number of Klebsiella-annotated virulence genes detected by DEC infection and diarrhea status

#subset Klebsiella genes
Kleb_VFDB_PA<-VFDB_PA %>%
  as.data.frame() %>%
  select(+contains("Klebsiella")) %>%
  mutate_if(is.numeric, ~1 * (. !=0))

#new dataframes with rowsums
Kleb_VFDB_rowsums<-data.frame(rowSums=rowSums(Kleb_VFDB_PA), cc=meta$Clin, path=meta$Pathotype, cc_path=meta$Pathotype_Clinical)

Kleb_VFDB_rowsums_case<-subset(Kleb_VFDB_rowsums, cc=="case")
Kleb_VFDB_rowsums_control<-subset(Kleb_VFDB_rowsums, cc=="control")

Kleb_VFDB_rowsums_pathpos<-subset(Kleb_VFDB_rowsums, path=="pathotype+")
Kleb_VFDB_rowsums_pathneg<-subset(Kleb_VFDB_rowsums, path=="pathotype-")

Kleb_VFDB_rowsums_pathpos_case<-subset(Kleb_VFDB_rowsums, path=="pathotype+" & cc=="case")
Kleb_VFDB_rowsums_pathpos_control<-subset(Kleb_VFDB_rowsums, path=="pathotype+" & cc=="control")

Kleb_VFDB_rowsums_pathneg_case<-subset(Kleb_VFDB_rowsums, path=="pathotype-" & cc=="case")
Kleb_VFDB_rowsums_pathneg_control<-subset(Kleb_VFDB_rowsums, path=="pathotype-" & cc=="control")

#nonparametric pairwise tests
wilcox.test(Kleb_VFDB_rowsums_case$rowSums, Kleb_VFDB_rowsums_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Kleb_VFDB_rowsums_case$rowSums and Kleb_VFDB_rowsums_control$rowSums
## W = 1375, p-value = 0.8187
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Kleb_VFDB_rowsums_pathpos$rowSums, Kleb_VFDB_rowsums_pathneg$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Kleb_VFDB_rowsums_pathpos$rowSums and Kleb_VFDB_rowsums_pathneg$rowSums
## W = 1287, p-value = 0.6835
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Kleb_VFDB_rowsums_pathpos_case$rowSums, Kleb_VFDB_rowsums_pathpos_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Kleb_VFDB_rowsums_pathpos_case$rowSums and Kleb_VFDB_rowsums_pathpos_control$rowSums
## W = 389, p-value = 0.651
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(Kleb_VFDB_rowsums_pathneg_case$rowSums, Kleb_VFDB_rowsums_pathneg_control$rowSums)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Kleb_VFDB_rowsums_pathneg_case$rowSums and Kleb_VFDB_rowsums_pathneg_control$rowSums
## W = 289, p-value = 0.717
## alternative hypothesis: true location shift is not equal to 0

#plot of total virulence genes by DEC infection and diarrhea status

VFDB_rowsums$path <- factor(VFDB_rowsums$path, levels=c("pathotype+", "pathotype-"))

VFDB_path_box<-VFDB_rowsums %>% 
  tibble::rownames_to_column("Sample_ID") %>%
  ggplot(aes(fill=cc_path, y=rowSums, x=path))+
    geom_boxplot()+
    scale_fill_manual(values=c("pathotype+_case"="coral2", "pathotype+_control"="steelblue3", "pathotype-_case"="darkgoldenrod2", "pathotype-_control"="darkolivegreen4"), labels=c("Symptomatic DEC infections", "Asymptomatic DEC infections", "Uninfected cases", "Uninfected controls"))+
    #ggtitle("All samples")+
    ylab("Number of genes")+
    xlab(NULL)+
    stat_compare_means(method="wilcox.test", label.x=0.8, size=3.5)+
    guides(fill=guide_legend(nrow=2), byrow=TRUE)+
    theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.title=element_blank(), panel.border=element_rect(color="black", fill=NA, size=0.5), legend.position="bottom", axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.text=element_text(size=12), axis.text.y=element_text(size=10), axis.title.y=element_text(size=12))
VFDB_path_box

#plot of E.coli virulence genes by cc and pathotype status

EC_VFDB_rowsums$path <- factor(EC_VFDB_rowsums$path, levels=c("pathotype+", "pathotype-"))

EC_VFDB_path_box<-EC_VFDB_rowsums %>% 
  tibble::rownames_to_column("Sample_ID") %>%
  ggplot(aes(fill=cc_path, y=rowSums, x=path))+
    geom_boxplot()+
    scale_fill_manual(values=c("pathotype+_case"="coral2", "pathotype+_control"="steelblue3", "pathotype-_case"="darkgoldenrod2", "pathotype-_control"="darkolivegreen4"), labels=c("Symptomatic DEC infections", "Asymptomatic DEC infections", "Uninfected cases", "Uninfected controls"))+
    #ggtitle("All samples")+
    ylab("Number of genes")+
    xlab(NULL)+
    stat_compare_means(method="wilcox.test", label.x=0.8, size=3.5)+
    guides(fill=guide_legend(nrow=2), byrow=TRUE)+
    theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.title=element_blank(), panel.border=element_rect(color="black", fill=NA, size=0.5), legend.position="bottom", axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.text=element_text(size=12), axis.text.y=element_text(size=10), axis.title.y=element_text(size=12))
EC_VFDB_path_box

#plot of Klebsiella virulence genes by cc and pathotype status

Kleb_VFDB_rowsums$path <- factor(Kleb_VFDB_rowsums$path, levels=c("pathotype+", "pathotype-"))

Kleb_VFDB_path_box<-Kleb_VFDB_rowsums %>% 
  tibble::rownames_to_column("Sample_ID") %>%
  ggplot(aes(fill=cc_path, y=rowSums, x=path))+
    geom_boxplot()+
    scale_fill_manual(values=c("pathotype+_case"="coral2", "pathotype+_control"="steelblue3", "pathotype-_case"="darkgoldenrod2", "pathotype-_control"="darkolivegreen4"), labels=c("Symptomatic DEC infections", "Asymptomatic DEC infections", "Uninfected cases", "Uninfected controls"))+
    #ggtitle("All samples")+
    ylab("Number of genes")+
    xlab(NULL)+
    stat_compare_means(method="wilcox.test", label.x=0.8, size=3.5)+
    guides(fill=guide_legend(nrow=2), byrow=TRUE)+
    theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.title=element_blank(), panel.border=element_rect(color="black", fill=NA, size=0.5), legend.position="bottom", axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.text=element_text(size=12), axis.text.y=element_text(size=10), axis.title.y=element_text(size=12))
Kleb_VFDB_path_box

#boxplot multi with Ec result

#boxplot
VFDB_rowsums$path <- factor(VFDB_rowsums$path, levels=c("pathotype+", "pathotype-"))

VFDB_path_box2<-VFDB_rowsums %>% 
  tibble::rownames_to_column("Sample_ID") %>%
  ggplot(aes(fill=cc_path, y=rowSums, x=path))+
    geom_boxplot()+
    scale_fill_manual(values=c("pathotype+_case"="coral2", "pathotype+_control"="steelblue3", "pathotype-_case"="darkgoldenrod2", "pathotype-_control"="darkolivegreen4"), labels=c("Symptomatic DEC infections", "Asymptomatic DEC infections", "Uninfected cases", "Uninfected controls"))+
    ylab("Number of genes")+
    xlab(NULL)+
    stat_compare_means(method="wilcox.test", label.x=0.8, size=3.5)+
    ggtitle("Total virulence genes (any taxa)")+
    theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), , panel.border=element_rect(color="black", fill=NA, size=0.5), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=10), axis.title.y=element_text(size=12), legend.position="none")
VFDB_path_box2

#boxplot w only E. coli-annotated genes
EC_VFDB_rowsums$path <- factor(EC_VFDB_rowsums$path, levels=c("pathotype+", "pathotype-"))

EC_VFDB_path_box2<-EC_VFDB_rowsums %>% 
  tibble::rownames_to_column("Sample_ID") %>%
  ggplot(aes(fill=cc_path, y=rowSums, x=path))+
    geom_boxplot()+
    scale_fill_manual(values=c("pathotype+_case"="coral2", "pathotype+_control"="steelblue3", "pathotype-_case"="darkgoldenrod2", "pathotype-_control"="darkolivegreen4"), labels=c("Symptomatic DEC infections", "Asymptomatic DEC infections", "Uninfected cases", "Uninfected controls"))+
    ylab("Number of genes")+
    xlab(NULL)+
    stat_compare_means(method="wilcox.test", label.x=0.8, size=3.5)+
    guides(fill=guide_legend(nrow=2), byrow=TRUE)+
    ggtitle(expression(paste(italic("E. coli"),"-annotated virulence genes")))+
    theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.title=element_blank(), panel.border=element_rect(color="black", fill=NA, size=0.5), legend.position="bottom", axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.text=element_text(size=12), axis.text.y=element_text(size=10), axis.title.y=element_text(size=12))
EC_VFDB_path_box2

box2<-plot_grid(VFDB_path_box2, EC_VFDB_path_box2, ncol=1, rel_heights=c(1,1.3), labels=c("A", "B"))
box2

#heatmap

#read in streamlined VFDB descriptions
streamlined_KW<-read.csv("mean_KW_mat.csv")

streamlined_KW_mat<-streamlined_KW %>%
  subset(select = -c(1:7) ) %>%
  column_to_rownames("description")%>%
  as.matrix()

colnames(streamlined_KW_mat)<-c("Symptomatic DEC infections", "Asymptomatic DEC infections", "Uninfected cases", "Uninfected controls")

#KW
heatmap_annot<-read.csv("mean_heatmap_annot v2.csv")

org_annot<-read.csv("mean_KW_mat_descriptors_org.csv") %>%
  column_to_rownames(var="description")%>%
  as.matrix()

func_annot<-read.csv("mean_KW_mat_descriptors_func.csv") %>%
  column_to_rownames(var="description")%>%
  as.matrix()

operon_annot<-read.csv("mean_KW_mat_descriptors_operon.csv") %>%
  column_to_rownames(var="description")%>%
  as.matrix()

col = list(pathotype_cc=c("Symptomatic DEC infections"="coral2", "Asymptomatic DEC infections"="steelblue3", "Uninfected cases"="darkgoldenrod2", "Uninfected controls"="darkolivegreen4" ),
           path=c("Pathotype-negative"="gray98", "Pathotype-positive"="gray60"),
           cc=c("Case"="gray60", "Control"="gray98"))

cn=colnames(streamlined_KW_mat)

levels=c("Symptomatic DEC infections", "Asymptomatic DEC infections", "Uninfected cases", "Uninfected controls")

ha<-HeatmapAnnotation(pathotype_cc=heatmap_annot$pathotype_cc,
                      cc=heatmap_annot$cc,
                      path=heatmap_annot$pathotype,
                      col=col, 
                      annotation_label=c("Sample group", "DEC infection status", "Diarrhea status"), 
                      #text=anno_text(cn,just="center", location=0.5),
                      #gp=gpar(col="white", border="black"),
                      annotation_height=(max_text_width(cn)))

mycols_mean <- colorRamp2(breaks = c(0,(max(streamlined_KW_mat)/2),max(streamlined_KW_mat)), colors = c("gray98", "mediumpurple", "mediumpurple4"))

h_KW<-Heatmap(streamlined_KW_mat, 
              row_names_gp = gpar(fontsize = 8), 
              show_column_names=FALSE,  
              show_column_dend=FALSE, 
              col=mycols_mean, 
              show_row_dend = FALSE, 
              show_row_names=FALSE,
              column_order=c(1,2,3,4),
              bottom_annotation=ha, 
              heatmap_legend_param=list(title=c("Rel. abund"), legend_height=unit(3, "cm")))
#h_KW<-draw(h_KW, heatmap_legend_side="left", annotation_legend_side="left")

org<-Heatmap(org_annot, 
             width=unit(5,"mm"), 
             show_column_names=FALSE, 
             show_row_names=FALSE, 
             name="Organism",
             col=c("lightsteelblue2", "Gray60", "lightskyblue3", "lightskyblue4", "gray80"))

nb.cols<-9
func_color<-order_colors<-colorRampPalette(rev(brewer.pal(9, "Paired")))(nb.cols)
func<-Heatmap(func_annot, 
             width=unit(5,"mm"), 
             show_column_names=FALSE, 
             show_row_names=TRUE, 
             name="Function",
             col=c(func_color),
             row_names_gp=gpar(fontsize=9),
             row_names_max_width=max_text_width(rownames(streamlined_KW_mat)))


nb.cols<-23
operon_color<-order_colors<-colorRampPalette(brewer.pal(23, "Spectral"))(nb.cols)
operon<-Heatmap(operon_annot, 
           width=unit(5,"mm"), 
           show_column_names=FALSE, 
           show_row_names=TRUE, 
           name="Operon", 
           row_names_max_width=max_text_width(rownames(mean_KW_mat)),
           col=c(operon_color))
h_KW

h_KW+func