The raw microbiome sequence data is counts data, i.e. they are the number of sequencing reads that, after quality control, aligned to each Faecalibacterium prausnitzii Organizational Taxonomic Unit (OTU). This will be referred to as the “raw data” from now on, and is identified by the object name “otus_raw”.
We created a new variable, named otus_root by calculating the square root of otus_raw.
We also created the variable otus_binary which contains indicator variables for the presence of each OTU, thus it only has values of 0 and 1.
Finally, we created a new variable that equals the sum of the 6 OTUs in the otus_raw dataset. We also created an square-root and an indicator variable for this variable. This 3 variables are stored in the table otus_total.
dat <- read.table(dat_file, stringsAsFactors = F, header = T, sep = "\t")
rownames(dat) <- as.character(dat$Sample_Id)
model_data <- dat %>% select(Sample_Id, delta_laz_median, sex, age_days.x, time_span, breastfed, country)
model_data$delta_laz_median <- as.numeric(model_data$delta_laz_median)
model_data$age_days.x <- as.numeric(model_data$age_days.x)
model_data$time_span <- as.numeric(model_data$time_span)
model_data <- model_data %>% dplyr::rename(linear_growth=delta_laz_median)
model_data <- model_data %>% dplyr::rename(age=age_days.x)
model_data <- model_data %>% dplyr::rename(time=time_span)
otus_raw <- dat %>% select(Sample_Id, starts_with("OTU")[1:6])
otus_raw$OTU_total <- rowSums(otus_raw[,-1])
for(i in 2:ncol(otus_raw)){
otus_raw[,i] <- as.integer(otus_raw[,i])
}
otus_root <- otus_raw
otus_root[,2:8] <- sqrt(otus_raw[,2:8])
otus_binary <- otus_raw
for(i in 2:ncol(otus_binary)){
otus_binary[,i] <- as.integer(otus_binary[,i]>=1)
}
otus_total <- data.frame(Sample_Id=otus_raw$Sample_Id,
stringsAsFactors = F)
otus_total$raw <- otus_raw$OTU_total
otus_total$root <- otus_root$OTU_total
otus_total$binary <- otus_binary$OTU_total
rownames(otus_total) <- rownames(otus_raw)
otu_objects <- c("otus_raw", "otus_binary", "otus_root")
parameterizations <- c("Raw", "Binary", "Square Root")
tmp <- model_data %>% select(!Sample_Id)
metadata_summary <- autosummary_list(tmp)
if(!is.na(metadata_summary$quant)){
kbl(metadata_summary$quant, caption="Summary of the quantitative demographic and clinical variables") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
}
| Variable | Mean | Std. Dev. | Min | Q1 | Q2 | Q3 | Max |
|---|---|---|---|---|---|---|---|
| linear_growth | -0.2 | 0.6 | -4 | -0.5 | -0.2 | 0.1 | 6.7 |
| age | 376.7 | 172 | 43 | 235 | 356 | 515 | 728 |
| time | 62.1 | 11.7 | 48 | 52 | 59 | 70 | 102 |
vars <- unique(metadata_summary$qual$Variable)
tmp <- metadata_summary$qual %>% filter(Variable==vars[1]) %>% select(!Variable)
kbl(tmp, caption=vars[1]) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Group | Frequency (%) |
|---|---|
| Female | 540(44) |
| Male | 697(56) |
| NA | 0(0) |
tmp <- metadata_summary$qual %>% filter(Variable==vars[2]) %>% select(!Variable)
kbl(tmp, caption=vars[2]) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Group | Frequency (%) |
|---|---|
| exclusively | 69(6) |
| no | 160(13) |
| partially | 1008(81) |
| NA | 0(0) |
tmp <- metadata_summary$qual %>% filter(Variable==vars[3]) %>% select(!Variable)
kbl(tmp, caption=vars[3]) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Group | Frequency (%) |
|---|---|
| Bangladesh | 312(25) |
| Kenya | 520(42) |
| Mali | 85(7) |
| The Gambia | 320(26) |
| NA | 0(0) |
cols <- c("dodgerblue4", "gray50")
cols <- adjustcolor(cols, alpha.f = 0.5)
hist(model_data$linear_growth, breaks = 30, main = "Linear growth",
xlab = expr(paste(Delta, " height Z-score")), cex.lab = 1.2)
qqnorm(model_data$linear_growth, main = "Linear growth", cex.lab = 1.2,
col=cols[2], pch = 19)
extreme_values <- sort(model_data$linear_growth)
extreme_values <- extreme_values[c(1:2, (nrow(model_data)-3):nrow(model_data))]
idx_outliers <- model_data$linear_growth %in% extreme_values
outliers <- model_data[idx_outliers,]
par(xpd=TRUE)
qqnorm(model_data$linear_growth, main = "Linear growth", cex.lab = 1.2,
col=cols[abs(idx_outliers-2)], pch = 19)
grid()
legend("topleft", legend= c("Non-outliers", "Potential outliers"),
pch =19, bty="L", box.col=cols[2], border=cols[2],
col = cols[2:1])
par(xpd=FALSE)
kbl(outliers, caption="List of potential outliers whose removal will be assessed") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Sample_Id | linear_growth | sex | age | time | breastfed | country | |
|---|---|---|---|---|---|---|---|
| 102474 | 102474 | -3.96 | Female | 728 | 74 | partially | The Gambia |
| 102333 | 102333 | 4.21 | Male | 512 | 67 | no | The Gambia |
| 100432 | 100432 | 6.71 | Male | 715 | 77 | no | The Gambia |
| 200253 | 200253 | 4.28 | Male | 471 | 59 | partially | Mali |
| 400993 | 400993 | 3.91 | Male | 291 | 90 | partially | Kenya |
| 402065 | 402065 | -3.69 | Male | 554 | 50 | no | Kenya |
overview1 <- data.frame()
for(j in c(1,3)){
obj <- get(otu_objects[j]) %>% select(!Sample_Id)
coln <- colnames(obj)
for(i in (1:ncol(obj))){
tmp <- round(summary(obj[,i]),2)
std <- round(sd(obj[,i]),2)
tmp <- c(parameterizations[j], coln[i], tmp, std)
overview1 <- rbind(overview1, tmp)
}
}
colnames(overview1) <- c("Parameterization", "OTU", "Min", "Q1", "Q2", "Mean", "Q3", "Max", "SD")
overview1 <- overview1 %>% select(Parameterization, OTU, Mean, SD, Q1, Q2, Q3, Min, Max)
nbinary <- unname(colSums(otus_binary[,-1]))
nbinary <- paste0( nbinary, "(", round(nbinary/nrow(otus_binary)*100), ")")
overview2 <- data.frame(Parameterization="Binary",
OTU=colnames(otus_binary[,-1]),
N=nbinary,
n=colSums(otus_binary[,-1]))
rownames(overview2) <- NULL
# otus_raw <- otus_raw %>% select(!OTU_total)
# otus_binary <- otus_binary %>% select(!OTU_total)
# otus_root <- otus_root %>% select(!OTU_total)
kbl(overview1, caption="Summary of the quantitative microbiota abundance variables used as predictors") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Parameterization | OTU | Mean | SD | Q1 | Q2 | Q3 | Min | Max |
|---|---|---|---|---|---|---|---|---|
| Raw | OTU_39042 | 1.68 | 10.9 | 0 | 0 | 0 | 0 | 190 |
| Raw | OTU_39040 | 1.12 | 6.56 | 0 | 0 | 0 | 0 | 136 |
| Raw | OTU_67958 | 0.94 | 6.52 | 0 | 0 | 0 | 0 | 156 |
| Raw | OTU_54565 | 0.72 | 2.88 | 0 | 0 | 0 | 0 | 46 |
| Raw | OTU_87766 | 0.38 | 1.43 | 0 | 0 | 0 | 0 | 18 |
| Raw | OTU_65293 | 0.31 | 1.83 | 0 | 0 | 0 | 0 | 35 |
| Raw | OTU_total | 5.15 | 17.48 | 0 | 0 | 3 | 0 | 207 |
| Square Root | OTU_39042 | 0.34 | 1.25 | 0 | 0 | 0 | 0 | 13.78 |
| Square Root | OTU_39040 | 0.32 | 1.01 | 0 | 0 | 0 | 0 | 11.66 |
| Square Root | OTU_67958 | 0.28 | 0.93 | 0 | 0 | 0 | 0 | 12.49 |
| Square Root | OTU_54565 | 0.32 | 0.79 | 0 | 0 | 0 | 0 | 6.78 |
| Square Root | OTU_87766 | 0.21 | 0.58 | 0 | 0 | 0 | 0 | 4.24 |
| Square Root | OTU_65293 | 0.14 | 0.54 | 0 | 0 | 0 | 0 | 5.92 |
| Square Root | OTU_total | 1.13 | 1.97 | 0 | 0 | 1.73 | 0 | 14.39 |
kbl(overview2, caption="Summary of the indicator variables for presence of F. prausnitzii") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Parameterization | OTU | N | n |
|---|---|---|---|
| Binary | OTU_39042 | 153(12) | 153 |
| Binary | OTU_39040 | 183(15) | 183 |
| Binary | OTU_67958 | 170(14) | 170 |
| Binary | OTU_54565 | 232(19) | 232 |
| Binary | OTU_87766 | 178(14) | 178 |
| Binary | OTU_65293 | 103(8) | 103 |
| Binary | OTU_total | 505(41) | 505 |
model_data$breastfed <- factor(model_data$breastfed) %>% relevel(ref="no")
model_data$country <- factor(model_data$country) %>% relevel(ref="Bangladesh")
model_data$sex <- factor(model_data$sex) %>% relevel(ref="Female")
bivariate_metadata <- data.frame()
for(i in 3:ncol(model_data)){
mod_tmp <- lm(model_data$linear_growth ~ model_data[,i]) %>% confint2(digits = 4)
bivariate_metadata <- rbind(bivariate_metadata, mod_tmp)
}
bivariate_metadata$variable <- rownames(bivariate_metadata)
rownames(bivariate_metadata) <- NULL
bivariate_metadata <- bivariate_metadata[,c(4,1:3)]
bivariate_metadata$variable <- c("intercept_sex", "Male", "intercept_age", "age",
"intercept_timespan", "timespan", "intercept_breastfed",
"Exclusively", "Partially", "intercept_country",
"Kenya", "Mali", "The_Gambia")
bivariate_metadata <- bivariate_metadata[-grep("intercept_", bivariate_metadata$variable),]
otus_names <- colnames(otus_raw[,-1])
bivariate_obj <- c("bivariate_raw", "bivariate_root", "bivariate_binary")
otus_obj <- c("otus_raw", "otus_root", "otus_binary")
options(par(mfrow=c(1,2)))
for(j in 1:3){
otus_tmp <- get(otus_obj[j])
bivariate_tmp <- data.frame()
for(i in 2:ncol(otus_tmp)){
mod_tmp <- lm(model_data$linear_growth ~ otus_tmp[,i])
# cat(paste0("_**", parameterizations[j], ", ", colnames(otus_tmp)[i], ":**_ \n "))
cat(" \n")
cat(paste0("### ", parameterizations[j], ", ", colnames(otus_tmp)[i], " \n"))
cat(" \n")
# cat(paste0("### ", parameterizations[j], ", ", colnames(otus_tmp)[i], " \n"))
par(mfrow=c(3,2))
plot(mod_tmp, which = 1:6, pch = 19, main = parameterizations[j],
col=cols[(idx_outliers-1)^2+1], lwd = 3, cex.lab = 2, cex = 1.2, cex.id = 1.2)
cat(" \n \n ")
mod_tmp <- mod_tmp %>% confint2(digits = 4)
bivariate_tmp <- rbind(bivariate_tmp, mod_tmp)
}
bivariate_tmp <- bivariate_tmp[-c(1,3,5,7,9,11,13),]
bivariate_tmp$variable<- otus_names
bivariate_tmp <- bivariate_tmp[,c(4,1:3)]
rownames(bivariate_tmp) <- NULL
assign(bivariate_obj[j], bivariate_tmp)
}
par(mfrow=c(1,1))
kbl(bivariate_metadata, caption="Crude associations of linear growth with demographic and clinical variables") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| variable | Estimate | P-value | 95%CI | |
|---|---|---|---|---|
| 2 | Male | 0.0175 | 0.6371 | (-0.0552, 0.0901) |
| 4 | age | 4e-04 | 1e-04 | (2e-04, 6e-04) |
| 6 | timespan | -0.001 | 0.5061 | (-0.0041, 0.002) |
| 8 | Exclusively | -0.2301 | 0.0133 | (-0.4122, -0.048) |
| 9 | Partially | -0.1088 | 0.0474 | (-0.2164, -0.0013) |
| 11 | Kenya | 0.0276 | 0.5509 | (-0.0631, 0.1183) |
| 12 | Mali | -0.0457 | 0.5628 | (-0.2007, 0.1093) |
| 13 | The_Gambia | -0.0408 | 0.4278 | (-0.1415, 0.06) |
kbl(bivariate_raw, caption="Crude associations of linear growth with RAW F. prausnitzii counts") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| variable | Estimate | P-value | 95%CI |
|---|---|---|---|
| OTU_39042 | 0.0013 | 0.4256 | (-0.002, 0.0046) |
| OTU_39040 | 0.0012 | 0.6664 | (-0.0043, 0.0067) |
| OTU_67958 | 0.0144 | 0 | (0.0089, 0.0199) |
| OTU_54565 | 0.0148 | 0.0204 | (0.0023, 0.0272) |
| OTU_87766 | -0.0077 | 0.5486 | (-0.0328, 0.0174) |
| OTU_65293 | -0.0021 | 0.8334 | (-0.0218, 0.0176) |
| OTU_total | 0.003 | 0.004 | (0.001, 0.0051) |
kbl(bivariate_root, caption="Crude associations of linear growth with square root(F. prausnitzii counts)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| variable | Estimate | P-value | 95%CI |
|---|---|---|---|
| OTU_39042 | 0.0202 | 0.1685 | (-0.0086, 0.049) |
| OTU_39040 | 0.0267 | 0.1433 | (-0.0091, 0.0625) |
| OTU_67958 | 0.0735 | 2e-04 | (0.0349, 0.1121) |
| OTU_54565 | 0.0365 | 0.1182 | (-0.0093, 0.0824) |
| OTU_87766 | -0.0165 | 0.6011 | (-0.0784, 0.0454) |
| OTU_65293 | -0.0014 | 0.9666 | (-0.0684, 0.0655) |
| OTU_total | 0.0265 | 0.0045 | (0.0082, 0.0447) |
kbl(bivariate_binary, caption="Crude associations of linear growth with presence of F. prausnitzii") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| variable | Estimate | P-value | 95%CI |
|---|---|---|---|
| OTU_39042 | 0.1284 | 0.0213 | (0.0192, 0.2375) |
| OTU_39040 | 0.0734 | 0.1556 | (-0.028, 0.1748) |
| OTU_67958 | 0.1041 | 0.0507 | (-3e-04, 0.2086) |
| OTU_54565 | 0.0166 | 0.7237 | (-0.0757, 0.1089) |
| OTU_87766 | -0.0056 | 0.9148 | (-0.1082, 0.097) |
| OTU_65293 | 0.0014 | 0.9831 | (-0.129, 0.1318) |
| OTU_total | 0.0863 | 0.0208 | (0.0131, 0.1594) |
varnames <- c("Sex", "Country", "Breastfed")
col_pos <- c(3, 6, 7)
boxplot(model_data$linear_growth~model_data$sex,
main = varnames[i], xlab = "", ylab = "Linear growth",
col=cols[2], pch = 19)
plotdata <- model_data
plotdata$breastfed <- str_to_sentence(plotdata$breastfed)
plotdata$sex <- str_to_sentence(plotdata$sex)
plotdata$country <- str_to_sentence(plotdata$country)
ggplot(data = plotdata, aes(x = sex, y = linear_growth)) +
geom_boxplot(
position = position_dodge(width=0.5),
aes(fill = sex),
outlier.colour = cols[2],
outlier.size = 0.8) +
scale_fill_manual(values = cols) +
facet_wrap(~ breastfed + country, ncol = 4, nrow=3) +
#Set the size of the plotting window
theme_bw(base_size=12) +
#Modify various aspects of the plot text and legend
theme(
legend.position = 'none',
legend.background = element_rect(),
axis.text.x = element_text(angle = 45, size = 12, hjust = 1.10),
axis.text.y = element_text(angle = 0, size = 12, vjust = 0.5),
axis.title = element_text(size = 12),
axis.title.x = element_text(size = 14, face="bold"),
axis.title.y = element_text(size = 14, face="bold"),
#facet wrap
strip.text.x = element_text(size = 12),
strip.text.y = element_text(size = 12, angle=0),
strip.background = element_rect(fill = 'white'),
strip.text = element_text(size = 12, colour = 'black'))+
#Change the size of the icons/symbols in the legend
guides(colour = guide_legend(override.aes = list(size = 2.5))) +
#Set x- and y-axes labels
ylab('Linear growth') +
xlab("Sex") +
labs(title = 'Linear growth stratified by sex, country and history of being breastfed')
### Second plot:
plot2 <- ggplot(data = plotdata, aes(x = sex, y = linear_growth)) +
geom_boxplot(
position = position_dodge(width=0.5),
aes(fill = sex),
outlier.colour = cols[2],
outlier.size = 1) +
scale_fill_manual(values = c(cols[2], cols[2])) +
facet_wrap(~ breastfed, ncol=3) +
#Set the size of the plotting window
theme_bw(base_size=12) +
#Modify various aspects of the plot text and legend
theme(
legend.position = 'none',
legend.background = element_rect(),
axis.text.x = element_text(angle = 45, size = 12, hjust = 1.10),
axis.text.y = element_text(angle = 0, size = 12, vjust = 0.5),
axis.title = element_text(size = 12),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
#facet wrap
strip.text.x = element_text(size = 12),
strip.text.y = element_text(size = 12, angle=0),
strip.background = element_rect(fill = 'white'),
strip.text = element_text(size = 12, colour = 'black'))+
#Change the size of the icons/symbols in the legend
guides(colour = guide_legend(override.aes = list(size = 2.5))) +
#Set x- and y-axes labels
ylab('Linear growth') +
xlab('Sex') +
labs(title = 'Linear growth stratified by sex and history of being breastfed')
### Third plot:
plot3 <- ggplot(data = plotdata, aes(x = sex, y = linear_growth)) +
geom_boxplot(
position = position_dodge(width=0.5),
aes(fill = sex),
outlier.colour = cols[2],
outlier.size = 1) +
scale_fill_manual(values = c(cols[2], cols[2])) +
facet_wrap(~ country, ncol=4) +
#Set the size of the plotting window
theme_bw(base_size=12) +
#Modify various aspects of the plot text and legend
theme(
legend.position = 'none',
legend.background = element_rect(),
axis.text.x = element_text(angle = 45, size = 12, hjust = 1.10),
axis.text.y = element_text(angle = 0, size = 12, vjust = 0.5),
axis.title = element_text(size = 12),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
#facet wrap
strip.text.x = element_text(size = 12),
strip.text.y = element_text(size = 12, angle=0),
strip.background = element_rect(fill = 'white'),
strip.text = element_text(size = 12, colour = 'black'))+
#Change the size of the icons/symbols in the legend
guides(colour = guide_legend(override.aes = list(size = 2.5))) +
#Set x- and y-axes labels
ylab('Linear growth') +
xlab('Sex') +
labs(title = 'Linear growth stratified by sex and country')
grid.arrange(plot2, plot3, nrow=2)
options(par(mfrow=c(3,1)))
cat(" \n")
cat("**Bivariate association: Linear growth vs OTU_total** \n ")
## **Bivariate association: Linear growth vs OTU_total**
##
plot(model_data$linear_growth ~ otus_raw$OTU_total, pch=19, col = cols[(idx_outliers-1)^2+1],
main = "Linear growth vs Raw OTU_total", xlab = "Raw OTU_total", ylab= "Linear growth",
cex.lab=1.6, cex=1.6, cex.main = 1.6); grid()
plot(model_data$linear_growth ~ otus_root$OTU_total, pch=19, col = cols[(idx_outliers-1)^2+1],
main = "Linear growth Square root(OTU_total)", xlab = "Square root(OTU_total)", ylab= "Linear growth",
cex.lab=1.6, cex=1.6, cex.main = 1.6); grid()
plot(model_data$linear_growth ~ otus_binary$OTU_total, pch=19, col = cols[(idx_outliers-1)^2+1],
main = "Linear growth vs Binary OTU_total", xlab = "Binary OTU_total", ylab= "Linear growth",
cex.lab=1.6, cex=1.6, cex.main = 1.6); grid()
cat(" \n")
cat("**Bivariate association: Linear growth vs OTU_67958** \n ")
## **Bivariate association: Linear growth vs OTU_67958**
##
plot(model_data$linear_growth ~ otus_raw$OTU_67958, pch=19, col = cols[(idx_outliers-1)^2+1],
main = "Linear growth vs Raw OTU_67958", xlab = "Raw OTU_67958", ylab= "Linear growth",
cex.lab=1.6, cex=1.6, cex.main = 1.6); grid()
plot(model_data$linear_growth ~ otus_root$OTU_67958, pch=19, col = cols[(idx_outliers-1)^2+1],
main = "Linear growth Square root(OTU_67958)", xlab = "Square root(OTU_67958)", ylab= "Linear growth",
cex.lab=1.6, cex=1.6, cex.main = 1.6); grid()
plot(model_data$linear_growth ~ otus_binary$OTU_67958, pch=19, col = cols[(idx_outliers-1)^2+1],
main = "Linear growth vs Binary OTU_67958", xlab = "Binary OTU_67958", ylab= "Linear growth",
cex.lab=1.6, cex=1.6, cex.main = 1.6); grid()
cat(" \n")
options(par(mfrow=c(1,1)))
otus_root3 <- otus_3root <- otus_raw^(1/3)
otus_root4 <- otus_4root <- otus_raw^(1/4)
multimod0 <- lm(model_data$linear_growth ~ model_data$sex + model_data$age + model_data$time +
model_data$breastfed + model_data$country + otus_binary$OTU_total)
multimod1 <- lm(model_data$linear_growth ~ model_data$sex + model_data$age + model_data$time +
model_data$breastfed + model_data$country + otus_root$OTU_total + otus_binary$OTU_total)
multimod2 <- lm(model_data$linear_growth ~ model_data$sex + model_data$age + model_data$time +
model_data$breastfed + model_data$country + otus_root$OTU_total + otus_binary$OTU_total +
otus_raw$OTU_total)
multimod3 <- lm(model_data$linear_growth ~ model_data$sex + model_data$age + model_data$time +
model_data$breastfed + model_data$country + otus_root$OTU_67958 + otus_binary$OTU_67958)
multimod4 <- lm(model_data$linear_growth ~ model_data$sex + model_data$age + model_data$time +
model_data$breastfed + model_data$country + otus_root$OTU_67958 + otus_binary$OTU_67958 +
otus_raw$OTU_67958)
multimod5 <- lm(model_data$linear_growth ~ model_data$sex + model_data$age + model_data$time +
model_data$breastfed + model_data$country + otus_root$OTU_67958 + otus_binary$OTU_67958 +
otus_raw$OTU_67958 + otus_root3$OTU_67958)
multimod6 <- lm(model_data$linear_growth ~ model_data$age + otus_root$OTU_67958 +
otus_binary$OTU_67958 +
otus_raw$OTU_67958 + otus_root3$OTU_67958 ) ## Best model
for(i in 0:6){
tmpmod <- get(paste0("multimod", i))
tmp_output <- format_model(tmpmod)
tmpcaption <- paste0("Multivariable model ", i, ". R-squared: ", round(summary(tmpmod)$r.squared, 3))
cat(" \n")
print(kbl(format_model(tmpmod), caption=tmpcaption) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")))
cat(" \n")
}
| Estimate | Std. Error | t-statistic | P-value | variable |
|---|---|---|---|---|
| -0.3224074 | 0.1756001 | -1.8360323 | 0.0665947 | Intercept |
| 0.0179152 | 0.0370226 | 0.4838985 | 0.6285443 | sexMale |
| 0.0003944 | 0.0001339 | 2.9447571 | 0.0032930 | age |
| 0.0004505 | 0.0021974 | 0.2050025 | 0.8376042 | time |
| -0.0693451 | 0.1033678 | -0.6708575 | 0.5024376 | breastfedexclusively |
| -0.0388964 | 0.0605777 | -0.6420918 | 0.5209335 | breastfedpartially |
| 0.0357739 | 0.0611468 | 0.5850498 | 0.5586218 | countryKenya |
| -0.0291160 | 0.0821072 | -0.3546094 | 0.7229431 | countryMali |
| -0.0722800 | 0.0531170 | -1.3607713 | 0.1738359 | countryThe Gambia |
| 0.0153269 | 0.0434695 | 0.3525896 | 0.7244567 | otus_binary: total |
| Estimate | Std. Error | t-statistic | P-value | variable |
|---|---|---|---|---|
| -0.3177217 | 0.1756040 | -1.8093085 | 0.0706479 | Intercept |
| 0.0178470 | 0.0370147 | 0.4821582 | 0.6297796 | sexMale |
| 0.0003810 | 0.0001344 | 2.8354141 | 0.0046518 | age |
| 0.0004161 | 0.0021971 | 0.1893983 | 0.8498120 | time |
| -0.0650103 | 0.1034057 | -0.6286916 | 0.5296681 | breastfedexclusively |
| -0.0346777 | 0.0606614 | -0.5716597 | 0.5676574 | breastfedpartially |
| 0.0294528 | 0.0613483 | 0.4800912 | 0.6312482 | countryKenya |
| -0.0356595 | 0.0822611 | -0.4334919 | 0.6647337 | countryMali |
| -0.0705533 | 0.0531242 | -1.3280832 | 0.1843978 | countryThe Gambia |
| 0.0161363 | 0.0130865 | 1.2330523 | 0.2177926 | otus_root: total |
| -0.0256736 | 0.0547215 | -0.4691687 | 0.6390325 | otus_binary: total |
| Estimate | Std. Error | t-statistic | P-value | variable |
|---|---|---|---|---|
| -0.3230552 | 0.1754316 | -1.8414882 | 0.0657917 | Intercept |
| 0.0177924 | 0.0369738 | 0.4812159 | 0.6304490 | sexMale |
| 0.0003914 | 0.0001343 | 2.9136296 | 0.0036374 | age |
| 0.0004546 | 0.0021948 | 0.2071188 | 0.8359515 | time |
| -0.0690215 | 0.1033123 | -0.6680857 | 0.5042047 | breastfedexclusively |
| -0.0380190 | 0.0606191 | -0.6271776 | 0.5306597 | breastfedpartially |
| 0.0354774 | 0.0613602 | 0.5781836 | 0.5632464 | countryKenya |
| -0.0340753 | 0.0821743 | -0.4146708 | 0.6784555 | countryMali |
| -0.0680025 | 0.0530819 | -1.2810852 | 0.2004062 | countryThe Gambia |
| -0.0510185 | 0.0372090 | -1.3711342 | 0.1705842 | otus_root: total |
| 0.0794356 | 0.0772072 | 1.0288630 | 0.3037472 | otus_binary: total |
| 0.0061885 | 0.0032104 | 1.9276759 | 0.0541264 | otus_raw: total |
| Estimate | Std. Error | t-statistic | P-value | variable |
|---|---|---|---|---|
| -0.3145681 | 0.1748041 | -1.7995470 | 0.0721780 | Intercept |
| 0.0140232 | 0.0368109 | 0.3809518 | 0.7033050 | sexMale |
| 0.0003880 | 0.0001245 | 3.1158669 | 0.0018766 | age |
| 0.0004078 | 0.0021867 | 0.1864969 | 0.8520859 | time |
| -0.0647361 | 0.1029237 | -0.6289719 | 0.5294846 | breastfedexclusively |
| -0.0385193 | 0.0603596 | -0.6381639 | 0.5234862 | breastfedpartially |
| 0.0340291 | 0.0602397 | 0.5648950 | 0.5722485 | countryKenya |
| -0.0587492 | 0.0819960 | -0.7164888 | 0.4738260 | countryMali |
| -0.0661855 | 0.0528965 | -1.2512255 | 0.2110909 | countryThe Gambia |
| 0.1042277 | 0.0302550 | 3.4449773 | 0.0005904 | otus_root: 67958 |
| -0.1668640 | 0.0833439 | -2.0021145 | 0.0454924 | otus_binary: 67958 |
| Estimate | Std. Error | t-statistic | P-value | variable |
|---|---|---|---|---|
| -0.3122884 | 0.1735037 | -1.7998949 | 0.0721232 | Intercept |
| 0.0177847 | 0.0365468 | 0.4866278 | 0.6266092 | sexMale |
| 0.0003889 | 0.0001236 | 3.1467588 | 0.0016907 | age |
| 0.0004563 | 0.0021704 | 0.2102508 | 0.8335069 | time |
| -0.0631722 | 0.1021582 | -0.6183767 | 0.5364420 | breastfedexclusively |
| -0.0456468 | 0.0599321 | -0.7616420 | 0.4464203 | breastfedpartially |
| 0.0375940 | 0.0597968 | 0.6286964 | 0.5296650 | countryKenya |
| -0.0864847 | 0.0816282 | -1.0594952 | 0.2895832 | countryMali |
| -0.0724589 | 0.0525221 | -1.3795891 | 0.1679650 | countryThe Gambia |
| -0.2196245 | 0.0793233 | -2.7687270 | 0.0057124 | otus_root: 67958 |
| 0.2551566 | 0.1264786 | 2.0173899 | 0.0438726 | otus_binary: 67958 |
| 0.0349795 | 0.0079301 | 4.4109942 | 0.0000112 | otus_raw: 67958 |
| Estimate | Std. Error | t-statistic | P-value | variable |
|---|---|---|---|---|
| -0.3137674 | 0.1726827 | -1.8170167 | 0.0694590 | Intercept |
| 0.0093235 | 0.0364513 | 0.2557801 | 0.7981636 | sexMale |
| 0.0003706 | 0.0001231 | 3.0101779 | 0.0026644 | age |
| 0.0008669 | 0.0021632 | 0.4007272 | 0.6886909 | time |
| -0.0833005 | 0.1018315 | -0.8180228 | 0.4135034 | breastfedexclusively |
| -0.0600623 | 0.0597856 | -1.0046279 | 0.3152746 | breastfedpartially |
| 0.0408560 | 0.0595207 | 0.6864161 | 0.4925807 | countryKenya |
| -0.0882210 | 0.0812432 | -1.0858885 | 0.2777421 | countryMali |
| -0.0754095 | 0.0522800 | -1.4424174 | 0.1494405 | countryThe Gambia |
| -3.7371378 | 0.9908653 | -3.7715902 | 0.0001700 | otus_root: 67958 |
| -2.4478734 | 0.7693765 | -3.1816328 | 0.0015013 | otus_binary: 67958 |
| 0.1323734 | 0.0284642 | 4.6505185 | 0.0000037 | otus_raw: 67958 |
| 6.0056006 | 1.6863683 | 3.5612628 | 0.0003832 | otus_root3: 67958 |
| Estimate | Std. Error | t-statistic | P-value | variable |
|---|---|---|---|---|
| -0.3213757 | 0.0433946 | -7.405896 | 0.0000000 | Intercept |
| 0.0003798 | 0.0001082 | 3.510398 | 0.0004636 | age |
| -3.6363710 | 0.9854776 | -3.689958 | 0.0002341 | otus_root: 67958 |
| -2.3928970 | 0.7649901 | -3.128011 | 0.0018012 | otus_binary: 67958 |
| 0.1282339 | 0.0283022 | 4.530881 | 0.0000064 | otus_raw: 67958 |
| 5.8602828 | 1.6771533 | 3.494184 | 0.0004924 | otus_root3: 67958 |
mods1 <- c(3,4,3, 5)
mods2 <- c(4, 5, 5, 6)
for(i in 1:4){
tmpmod1 <- get(paste0("multimod", mods1[i]))
tmpmod2 <- get(paste0("multimod", mods2[i]))
tmppval <- round(anova(tmpmod1, tmpmod2, test= "F")$P[2],4)
tmppval <- ifelse(tmppval<1e-4, "p<1e-4", tmppval)
tmpstr <- paste0("* Partial F test: Model ", mods1[i],
" vs Model ", mods2[i], ": ",
tmppval, " \n \n")
cat(tmpstr)
}
Partial F test: Model 3 vs Model 4: p<1e-4
Partial F test: Model 4 vs Model 5: 4e-04
Partial F test: Model 3 vs Model 5: p<1e-4
Partial F test: Model 5 vs Model 6: 0.3237
model_data2 <- model_data[!idx_outliers,]
otus_binary_b <- otus_binary[!idx_outliers,]
otus_raw_b <- otus_raw[!idx_outliers,]
otus_root_b <- otus_root[!idx_outliers,]
otus_3root_b <- otus_3root[!idx_outliers,]
otus_4root_b <- otus_4root[!idx_outliers,]
par(mfrow=c(3,2))
plot(multimod6, which = 1:6, pch = 19, main = "Best model before removing outliers" ,
col=cols[(idx_outliers-1)^2+1], cex = 1.5, cex.lab = 1.6, cex.main=1.4, lwd = 2,
cex.id=1.4)
multimod0_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_binary_b$OTU_total)
multimod1_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_root_b$OTU_total + otus_binary_b$OTU_total)
multimod2_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_root_b$OTU_total + otus_binary_b$OTU_total +
otus_raw_b$OTU_total)
multimod3_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_root_b$OTU_67958 + otus_binary_b$OTU_67958)
multimod4_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_root_b$OTU_67958 + otus_binary_b$OTU_67958 +
otus_raw_b$OTU_67958)
multimod5_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_root_b$OTU_67958 + otus_binary_b$OTU_67958 +
otus_raw_b$OTU_67958 + otus_3root_b$OTU_67958)
multimod6_b <- lm(model_data2$linear_growth ~ model_data2$age + otus_root_b$OTU_67958 +
otus_binary_b$OTU_67958 +
otus_raw_b$OTU_67958 + otus_3root_b$OTU_67958 ) ## Best model
multimod7_b <- lm(model_data2$linear_growth ~ model_data2$age + otus_root_b$OTU_67958 +
otus_binary_b$OTU_67958 + otus_3root_b$OTU_67958 )
multimod8_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_binary_b$OTU_67958 +
otus_raw_b$OTU_67958)
multimod9_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_binary_b$OTU_67958)
multimod10_b <- lm(model_data2$linear_growth ~ model_data2$sex + model_data2$age + model_data2$time +
model_data2$breastfed + model_data2$country + otus_raw_b$OTU_67958)
summary(multimod7_b)
##
## Call:
## lm(formula = model_data2$linear_growth ~ model_data2$age + otus_root_b$OTU_67958 +
## otus_binary_b$OTU_67958 + otus_3root_b$OTU_67958)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4480 -0.3267 -0.0020 0.3017 2.7190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.191e-01 3.823e-02 -8.347 < 2e-16 ***
## model_data2$age 3.336e-04 9.544e-05 3.496 0.00049 ***
## otus_root_b$OTU_67958 2.673e-02 2.346e-01 0.114 0.90929
## otus_binary_b$OTU_67958 1.886e-01 3.203e-01 0.589 0.55614
## otus_3root_b$OTU_67958 -9.671e-02 5.101e-01 -0.190 0.84965
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5558 on 1226 degrees of freedom
## Multiple R-squared: 0.01716, Adjusted R-squared: 0.01395
## F-statistic: 5.352 on 4 and 1226 DF, p-value: 0.0002838
summary(multimod8_b)
##
## Call:
## lm(formula = model_data2$linear_growth ~ model_data2$sex + model_data2$age +
## model_data2$time + model_data2$breastfed + model_data2$country +
## otus_binary_b$OTU_67958 + otus_raw_b$OTU_67958)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.38093 -0.33389 -0.01144 0.29533 2.79960
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2140267 0.1527081 -1.402 0.161306
## model_data2$sexMale -0.0129703 0.0319718 -0.406 0.685048
## model_data2$age 0.0003988 0.0001081 3.689 0.000235 ***
## model_data2$time -0.0016020 0.0019128 -0.838 0.402462
## model_data2$breastfedexclusively 0.0097166 0.0894581 0.109 0.913525
## model_data2$breastfedpartially 0.0159552 0.0527181 0.303 0.762208
## model_data2$countryKenya -0.0059620 0.0525175 -0.114 0.909633
## model_data2$countryMali -0.1080011 0.0712835 -1.515 0.130008
## model_data2$countryThe Gambia -0.1008688 0.0459252 -2.196 0.028253 *
## otus_binary_b$OTU_67958 0.1006301 0.0536711 1.875 0.061040 .
## otus_raw_b$OTU_67958 -0.0016366 0.0036594 -0.447 0.654800
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5544 on 1220 degrees of freedom
## Multiple R-squared: 0.02675, Adjusted R-squared: 0.01878
## F-statistic: 3.354 on 10 and 1220 DF, p-value: 0.000253
summary(multimod9_b)
##
## Call:
## lm(formula = model_data2$linear_growth ~ model_data2$sex + model_data2$age +
## model_data2$time + model_data2$breastfed + model_data2$country +
## otus_binary_b$OTU_67958)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.38157 -0.33237 -0.01148 0.29599 2.79913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2129967 0.1526407 -1.395 0.163145
## model_data2$sexMale -0.0129728 0.0319613 -0.406 0.684893
## model_data2$age 0.0003987 0.0001081 3.689 0.000235 ***
## model_data2$time -0.0016179 0.0019118 -0.846 0.397576
## model_data2$breastfedexclusively 0.0100585 0.0894255 0.112 0.910462
## model_data2$breastfedpartially 0.0158888 0.0527006 0.301 0.763091
## model_data2$countryKenya -0.0060204 0.0525002 -0.115 0.908722
## model_data2$countryMali -0.1102427 0.0710838 -1.551 0.121189
## model_data2$countryThe Gambia -0.1000480 0.0458735 -2.181 0.029377 *
## otus_binary_b$OTU_67958 0.0908123 0.0489599 1.855 0.063862 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5542 on 1221 degrees of freedom
## Multiple R-squared: 0.02659, Adjusted R-squared: 0.01942
## F-statistic: 3.706 on 9 and 1221 DF, p-value: 0.0001334
summary(multimod10_b)
##
## Call:
## lm(formula = model_data2$linear_growth ~ model_data2$sex + model_data2$age +
## model_data2$time + model_data2$breastfed + model_data2$country +
## otus_raw_b$OTU_67958)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3971 -0.3335 -0.0062 0.2996 2.7874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2181423 0.1528495 -1.427 0.1538
## model_data2$sexMale -0.0138770 0.0320010 -0.434 0.6646
## model_data2$age 0.0004386 0.0001061 4.133 3.83e-05 ***
## model_data2$time -0.0016403 0.0019146 -0.857 0.3918
## model_data2$breastfedexclusively 0.0013566 0.0894389 0.015 0.9879
## model_data2$breastfedpartially 0.0101976 0.0526828 0.194 0.8465
## model_data2$countryKenya 0.0074195 0.0520839 0.142 0.8867
## model_data2$countryMali -0.0964357 0.0710893 -1.357 0.1752
## model_data2$countryThe Gambia -0.0924093 0.0457501 -2.020 0.0436 *
## otus_raw_b$OTU_67958 0.0011699 0.0033428 0.350 0.7264
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.555 on 1221 degrees of freedom
## Multiple R-squared: 0.02395, Adjusted R-squared: 0.01675
## F-statistic: 3.329 on 9 and 1221 DF, p-value: 0.000497
# anova(multimod3_b, multimod4_b, test= "LRT") ## Testing raw ==> SIGNIFICANT
# anova(multimod4_b, multimod5_b, test= "LRT") ## Testing root 3 ==> SIGNIFICANT
# anova(multimod3_b, multimod5_b, test= "LRT") ## Testing raw and root 3 ==> SIGNIFICANT
# anova(multimod5_b, multimod6_b, test= "LRT") ## testing root4 ==> NON SIGNIFICANT
# Anova(multimod5_b) ## Testing all terms, best OTU model, but with irrelevant demographic covariates
# anova(multimod6_b, multimod5_b, test= "LRT") ## Testing demographic covariates
# Anova(multimod6_b) ## Best model
# summary(multimod7_b)