## Finding Geometric mean

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dd <- read.table("KP_Mash_Dist_Geom.out")
head(dd)
##                                             V1
## 1 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 2 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 3 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 4 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 5 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 6 NEPAL_COOL_C00013422_R00000049_contigs.fasta
##                                             V2          V3 V4       V5
## 1 NEPAL_COOL_C00013424_R00000049_contigs.fasta 0.000119496  0 995/1000
## 2 NEPAL_COOL_C00013425_R00000049_contigs.fasta 0.000191626  0 992/1000
## 3 NEPAL_COOL_C00013426_R00000049_contigs.fasta 0.000167546  0 993/1000
## 4 NEPAL_COOL_C00013427_R00000049_contigs.fasta 0.000143503  0 994/1000
## 5 NEPAL_COOL_C00013428_R00000049_contigs.fasta 0.000264084  0 989/1000
## 6 NEPAL_COOL_C00013430_R00000049_contigs.fasta 0.000191626  0 992/1000
summarydf <- dd %>% group_by(V1) %>% summarise(Geo.MashMean=exp(mean(log(V3))))
head(summarydf)
## # A tibble: 6 x 2
##   V1                                           Geo.MashMean
##   <fct>                                               <dbl>
## 1 NEPAL_COOL_C00013422_R00000049_contigs.fasta      0.00883
## 2 NEPAL_COOL_C00013424_R00000049_contigs.fasta      0.00870
## 3 NEPAL_COOL_C00013425_R00000049_contigs.fasta      0.00909
## 4 NEPAL_COOL_C00013426_R00000049_contigs.fasta      0.00897
## 5 NEPAL_COOL_C00013427_R00000049_contigs.fasta      0.00886
## 6 NEPAL_COOL_C00013428_R00000049_contigs.fasta      0.00917
write.table(summarydf, file = "KP_Mash_Dist_Geom_v2.out",row.names = FALSE)