## Finding Geometric mean
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dd <- read.table("KP_Mash_Dist_Geom.out")
head(dd)
## V1
## 1 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 2 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 3 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 4 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 5 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## 6 NEPAL_COOL_C00013422_R00000049_contigs.fasta
## V2 V3 V4 V5
## 1 NEPAL_COOL_C00013424_R00000049_contigs.fasta 0.000119496 0 995/1000
## 2 NEPAL_COOL_C00013425_R00000049_contigs.fasta 0.000191626 0 992/1000
## 3 NEPAL_COOL_C00013426_R00000049_contigs.fasta 0.000167546 0 993/1000
## 4 NEPAL_COOL_C00013427_R00000049_contigs.fasta 0.000143503 0 994/1000
## 5 NEPAL_COOL_C00013428_R00000049_contigs.fasta 0.000264084 0 989/1000
## 6 NEPAL_COOL_C00013430_R00000049_contigs.fasta 0.000191626 0 992/1000
summarydf <- dd %>% group_by(V1) %>% summarise(Geo.MashMean=exp(mean(log(V3))))
head(summarydf)
## # A tibble: 6 x 2
## V1 Geo.MashMean
## <fct> <dbl>
## 1 NEPAL_COOL_C00013422_R00000049_contigs.fasta 0.00883
## 2 NEPAL_COOL_C00013424_R00000049_contigs.fasta 0.00870
## 3 NEPAL_COOL_C00013425_R00000049_contigs.fasta 0.00909
## 4 NEPAL_COOL_C00013426_R00000049_contigs.fasta 0.00897
## 5 NEPAL_COOL_C00013427_R00000049_contigs.fasta 0.00886
## 6 NEPAL_COOL_C00013428_R00000049_contigs.fasta 0.00917
write.table(summarydf, file = "KP_Mash_Dist_Geom_v2.out",row.names = FALSE)