library(rio)
fdir1 <- "C:/Users/trant/Downloads/STUDY/COURSERA/R Intro/metamicrobiomeR-master/inst/extdata/QIIME_outputs/Bangladesh/"
natural2 <-import(paste(fdir1,"nature13421-s2.xlsx",sep=""),sheet=2,skip=1)
library(dplyr)
newnames <- c("cohort",
"family.id",
"child.id",
"fecal.sample.id",
"age.days.id",
"age.months.id",
"whz",
"haz",
"waz",
"diet",
"...11",
"...12",
"diarrhoea",
"antibiotics.7days",
"medications",
"number.V4-16SrRNA",
"16srrna",
"sample.barcode")
natural2 <- natural2 %>%
rename_at(vars(1:18), ~newnames)
fecal.child <- select(natural2, child.id, fecal.sample.id)
fecal.child <- as.data.frame(count(fecal.child, child.id)) %>%
rename_at(vars(2), ~"fecal.per.child")
library(plyr)
subnatural2 <- filter(select(natural2, "cohort", "child.id", "fecal.sample.id"), !is.na(fecal.sample.id)) %>%
group_by(cohort)
subnatural2 <- ddply(subnatural2,.(child.id),transform,count.fecal=length(cohort))
summarise(subnatural2,
mean = mean(count.fecal, na.rm = TRUE),
sd = sd(count.fecal, na.rm = TRUE),
median = median(count.fecal, na.rm = TRUE),
IQR = IQR(count.fecal, na.rm = TRUE))
## mean sd median IQR
## 1 20.90562 3.898605 22 5
##Plot number of fecal samples by cohort of singletons vs twins/triplets
library(ggplot2)
ggplot(subnatural2,aes(x=cohort, y=count.fecal, color=cohort)) +
geom_boxplot()