GLV

Problem 1

package and function load

library(igraph)
library(seqtime)
library(reshape2)
library(ggplot2)
K <- 20 # the number of species
S <- 50 # sample size

# Simulate the A interaction matrix from uniform distribution
getA <- function(tmp) { 
  tmp <- as.matrix(as_adjacency_matrix(tmp, sparse=FALSE))
  K <- nrow(tmp)
  A <- matrix(0, K, K)
  idx <- which(tmp[upper.tri(tmp)]==1)
  A[upper.tri(A)][idx] <- rnorm(length(idx), 0, 0.5)
  A <- A + t(A)
  # make LD-stable
  l1 <- max(eigen(A, only.values = TRUE, symmetric = TRUE)$values)
  if (l1 > 0) diag(A) <- diag(A) - l1 * 1.01
  A
}

Erdös- Renyi model

# generate the network
set.seed(1)
tmp1 <- sample_gnp(n = K, 0.1) ## Erdös- Renyi model
plot(tmp1)

# generate the interaction weights on the network
A1 <- getA(tmp1)
plotA(A1)

## [1] "Largest value: 0.657484987490467"
## [1] "Smallest value: -0.80087906712092"

# simualte the data using GLV
data1 <- generateDataSet(S, A1)
data1_1 <- melt(normalize(data1))
colnames(data1_1) <- c("Species", "Sample", "Abundance")
ggplot(data=data1_1, aes(x=Sample, y=Abundance, width=1)) + 
  geom_bar(aes(y = Abundance, x= Sample, fill=Species),stat="identity",show.legend=F) + 
  theme(aspect.ratio=.4) + theme_classic()+ labs(y="Relative abundance",x="Sample")

Watts-Strogatz model

set.seed(2)
tmp2 <- sample_smallworld(1, size = K, nei = 2, p = 0.1) ## Watts-Strogatz model
plot(tmp2)

A2 <- getA(tmp2)
plotA(A2)

## [1] "Largest value: 1.1737168881135"
## [1] "Smallest value: -1.67168511826455"

data2 <- generateDataSet(S, A2)
data2 <- melt(normalize(data2))
colnames(data2) <- c("Species", "Sample", "Abundance")
ggplot(data=data2, aes(x=Sample, y=Abundance, width=1)) + 
  geom_bar(aes(y = Abundance, x= Sample,fill=Species),stat="identity",show.legend=F) + 
  theme(aspect.ratio=.4) + theme_classic()+ labs(y="Relative abundance",x="Sample")

Barabasi-Albert model

set.seed(3)
tmp3 <- sample_pa(n = K, m=3, directed = F) ## Barabasi-Albert model
plot(tmp3)

A3 <- getA(tmp3)
plotA(A3)

## [1] "Largest value: 0.716370956389845"
## [1] "Smallest value: -2.4468582482149"

data3 <- generateDataSet(S, A3)
data3 <- melt(normalize(data3))
colnames(data3) <- c("Species", "Sample", "Abundance")
ggplot(data=data3, aes(x=Sample, y=Abundance, width=1)) + 
  geom_bar(aes(y = Abundance, x= Sample,fill=Species),stat="identity",show.legend=F) + 
  theme(aspect.ratio=.4) + theme_classic()+ labs(y="Relative abundance",x="Sample")

Klemm-Eguiluz model

A4 <- generateA(K, "klemm", pep=10, c =0.05)

## [1] "Adjusting connectance to 0.05"
## [1] "Initial edge number 190"
## [1] "Initial connectance 0.447368421052632"
## [1] "Number of edges removed 151"
## [1] "Final connectance 0.05"
## [1] "Final connectance: 0.05"
## [1] "Initial edge number 39"
## [1] "Initial connectance 0.05"
## [1] "Number of negative edges already present: 20"
## [1] "Converting 15 edges into negative edges"
## [1] "Final connectance: 0.05"
## [1] "Final arc number (excluding self-arcs) 19"
## [1] "Final negative arc number (excluding self-arcs) 15"
## [1] "PEP: 21.0526315789474"

plotA(A4, header="Klemm-Eguiluz interaction matrix")

## [1] "Largest value: 0.194245928921739"
## [1] "Smallest value: -0.5"

data4 = generateDataSet(S, A4)
data4 = melt(normalize(data4))
colnames(data4) = c("Species", "Sample", "Abundance")
ggplot(data=data4, aes(x=Sample, y=Abundance, width=1)) + 
  geom_bar(aes(y = Abundance, x= Sample, fill=Species),stat="identity",show.legend=F) + 
  theme(aspect.ratio=.4) + theme_classic()+ labs(y="Relative abundance",x="Sample")

Problem 2

cor_spearman <- cor(t(data1), method = 'spearman')
pheatmap::pheatmap(cor_spearman, border_color = NA, 
                   color = colorRampPalette(c("navy", "white", "firebrick3"))(50),
                   display_numbers = T, number_color = 'white', fontsize_number=7)

resampling to calculate the p-value for the correlation

null_cor <- list()
tmp <- cor_spearman
for(i in 1:1000){
  for(j in 1:20) {
    idx <- c(1:20)[-j]
    tmp[j, idx] <- sapply(idx, function(x) cor(data1[j, ], sample(data1[x, ]),method='spearman'))
  }
  null_cor[[i]] <- tmp
}

pv_cor <- matrix(0, K, K)
for(i in 1:1000){
  pv_cor <- pv_cor + (cor_spearman>null_cor[[i]])
}
pv_cor <- pv_cor/1000
diag(pv_cor) <- NA
pv_cor.adj <- matrix(p.adjust(c(pv_cor), method = 'BH'), K, K)
round(pv_cor.adj, 3)

##        [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
##  [1,]    NA 0.000 0.000 0.000 0.000 1.000 0.819 0.772 0.819 0.874 1.000 0.819
##  [2,] 0.000    NA 1.000 1.000 1.000 0.000 1.000 1.000 1.000 1.000 0.793 0.205
##  [3,] 0.000 1.000    NA 1.000 1.000 0.000 0.952 0.807 0.986 0.565 0.709 0.825
##  [4,] 0.000 1.000 1.000    NA 1.000 0.000 1.000 0.632 1.000 1.000 0.793 0.709
##  [5,] 0.000 1.000 1.000 1.000    NA 0.000 1.000 0.793 1.000 0.986 0.680 0.737
##  [6,] 1.000 0.000 0.000 0.007 0.000    NA 0.871 0.840 0.819 0.846 1.000 0.931
##  [7,] 0.836 1.000 0.979 1.000 1.000 0.866    NA 1.000 1.000 1.000 0.642 0.000
##  [8,] 0.772 1.000 0.793 0.655 0.793 0.827 1.000    NA 1.000 1.000 0.793 0.877
##  [9,] 0.793 1.000 0.986 1.000 1.000 0.819 1.000 1.000    NA 1.000 0.709 0.000
## [10,] 0.836 1.000 0.568 1.000 0.986 0.836 1.000 1.000 1.000    NA 1.000 0.626
## [11,] 1.000 0.819 0.709 0.793 0.709 1.000 0.642 0.793 0.709 1.000    NA 1.000
## [12,] 0.819 0.253 0.825 0.708 0.772 0.922 0.000 0.903 0.000 0.629 1.000    NA
## [13,] 1.000 0.922 0.793 1.000 0.793 1.000 0.819 1.000 0.819 0.781 1.000 0.060
## [14,] 0.000 1.000 1.000 1.000 1.000 0.000 0.642 0.836 0.793 0.642 0.793 0.819
## [15,] 1.000 0.000 0.000 0.000 0.000 1.000 0.819 0.875 0.793 0.793 1.000 1.000
## [16,] 1.000 0.772 0.207 0.793 0.836 1.000 0.874 0.618 0.877 1.000 1.000 0.642
## [17,] 0.000 1.000 1.000 1.000 1.000 0.000 0.793 0.819 0.884 0.793 0.793 0.709
## [18,] 1.000 0.000 0.000 0.000 0.000 1.000 0.723 0.836 0.723 0.470 1.000 0.949
## [19,] 0.253 1.000 1.000 0.988 1.000 0.172 1.000 0.875 1.000 1.000 0.737 0.416
## [20,] 0.629 1.000 1.000 1.000 1.000 0.626 1.000 1.000 1.000 1.000 0.202 0.000
##       [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
##  [1,] 1.000 0.000 1.000 1.000 0.000 1.000 0.253 0.632
##  [2,] 0.930 1.000 0.000 0.793 1.000 0.000 1.000 1.000
##  [3,] 0.793 1.000 0.000 0.209 1.000 0.000 1.000 1.000
##  [4,] 1.000 1.000 0.000 0.793 1.000 0.000 0.986 1.000
##  [5,] 0.793 1.000 0.000 0.843 1.000 0.000 1.000 1.000
##  [6,] 1.000 0.000 1.000 1.000 0.000 1.000 0.175 0.607
##  [7,] 0.819 0.642 0.819 0.853 0.793 0.709 1.000 1.000
##  [8,] 0.986 0.819 0.874 0.526 0.807 0.840 0.858 1.000
##  [9,] 0.819 0.793 0.793 0.874 0.882 0.737 1.000 1.000
## [10,] 0.781 0.676 0.793 1.000 0.781 0.466 1.000 1.000
## [11,] 1.000 0.793 1.000 1.000 0.793 1.000 0.747 0.143
## [12,] 0.054 0.816 1.000 0.708 0.709 0.968 0.349 0.000
## [13,]    NA 1.000 1.000 1.000 0.986 1.000 0.841 0.949
## [14,] 1.000    NA 0.000 0.781 1.000 0.000 1.000 0.709
## [15,] 1.000 0.000    NA 1.000 0.000 1.000 0.077 0.539
## [16,] 1.000 0.793 1.000    NA 0.642 1.000 1.000 0.775
## [17,] 1.000 1.000 0.000 0.642    NA 0.000 1.000 0.836
## [18,] 1.000 0.000 1.000 1.000 0.000    NA 0.127 0.207
## [19,] 0.853 1.000 0.072 1.000 1.000 0.054    NA 1.000
## [20,] 0.922 0.737 0.479 0.772 0.819 0.207 1.000    NA

check by sparCC

library(SpiecEasi)
sparcc_res <- sparcc(t(data1))
round(sparcc_res$Cor, 3)

##         [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]   [,9]  [,10]
##  [1,]  1.000 -0.056 -0.002 -0.065 -0.047  0.498  0.093 -0.005  0.024  0.011
##  [2,] -0.056  1.000  0.057  0.057  0.058 -0.093 -0.007  0.027 -0.096  0.042
##  [3,] -0.002  0.057  1.000  0.064 -0.044 -0.023 -0.047  0.033  0.048 -0.024
##  [4,] -0.065  0.057  0.064  1.000  0.034 -0.061  0.041  0.001  0.018 -0.038
##  [5,] -0.047  0.058 -0.044  0.034  1.000 -0.024 -0.040 -0.029  0.010  0.013
##  [6,]  0.498 -0.093 -0.023 -0.061 -0.024  1.000  0.097  0.099  0.103  0.075
##  [7,]  0.093 -0.007 -0.047  0.041 -0.040  0.097  1.000 -0.031  0.192  0.042
##  [8,] -0.005  0.027  0.033  0.001 -0.029  0.099 -0.031  1.000 -0.008  0.009
##  [9,]  0.024 -0.096  0.048  0.018  0.010  0.103  0.192 -0.008  1.000 -0.127
## [10,]  0.011  0.042 -0.024 -0.038  0.013  0.075  0.042  0.009 -0.127  1.000
## [11,]  0.077  0.005 -0.025 -0.019  0.003  0.057 -0.098 -0.055 -0.128 -0.070
## [12,]  0.103  0.018 -0.039 -0.038  0.008  0.050 -0.077 -0.010 -0.110 -0.045
## [13,]  0.041 -0.060 -0.040  0.018 -0.106  0.077 -0.111 -0.083 -0.184 -0.024
## [14,] -0.109  0.181  0.021  0.090  0.098 -0.174 -0.084  0.040 -0.027  0.037
## [15,]  0.477 -0.119 -0.008 -0.051 -0.033  0.658  0.071  0.053  0.042  0.064
## [16,]  0.042 -0.003 -0.013  0.029  0.031  0.081 -0.033 -0.093 -0.061  0.009
## [17,] -0.163  0.167  0.080  0.070  0.034 -0.240  0.013  0.105  0.062  0.033
## [18,]  0.253 -0.084 -0.023 -0.052 -0.058  0.438 -0.011  0.009 -0.062  0.045
## [19,] -0.042  0.019 -0.042 -0.023  0.019 -0.002 -0.004 -0.049  0.023  0.025
## [20,]  0.058 -0.008 -0.006  0.040 -0.019  0.060  0.053 -0.075  0.097 -0.036
##        [,11]  [,12]  [,13]  [,14]  [,15]  [,16]  [,17]  [,18]  [,19]  [,20]
##  [1,]  0.077  0.103  0.041 -0.109  0.477  0.042 -0.163  0.253 -0.042  0.058
##  [2,]  0.005  0.018 -0.060  0.181 -0.119 -0.003  0.167 -0.084  0.019 -0.008
##  [3,] -0.025 -0.039 -0.040  0.021 -0.008 -0.013  0.080 -0.023 -0.042 -0.006
##  [4,] -0.019 -0.038  0.018  0.090 -0.051  0.029  0.070 -0.052 -0.023  0.040
##  [5,]  0.003  0.008 -0.106  0.098 -0.033  0.031  0.034 -0.058  0.019 -0.019
##  [6,]  0.057  0.050  0.077 -0.174  0.658  0.081 -0.240  0.438 -0.002  0.060
##  [7,] -0.098 -0.077 -0.111 -0.084  0.071 -0.033  0.013 -0.011 -0.004  0.053
##  [8,] -0.055 -0.010 -0.083  0.040  0.053 -0.093  0.105  0.009 -0.049 -0.075
##  [9,] -0.128 -0.110 -0.184 -0.027  0.042 -0.061  0.062 -0.062  0.023  0.097
## [10,] -0.070 -0.045 -0.024  0.037  0.064  0.009  0.033  0.045  0.025 -0.036
## [11,]  1.000  0.007  0.002 -0.001  0.068 -0.087  0.044 -0.004  0.022 -0.071
## [12,]  0.007  1.000 -0.044 -0.085  0.102 -0.002 -0.025  0.059 -0.007 -0.063
## [13,]  0.002 -0.044  1.000  0.026  0.124 -0.005  0.028  0.110  0.026 -0.049
## [14,] -0.001 -0.085  0.026  1.000 -0.163  0.010  0.414 -0.060  0.013  0.020
## [15,]  0.068  0.102  0.124 -0.163  1.000  0.071 -0.225  0.377 -0.045 -0.016
## [16,] -0.087 -0.002 -0.005  0.010  0.071  1.000  0.020 -0.026 -0.091 -0.102
## [17,]  0.044 -0.025  0.028  0.414 -0.225  0.020  1.000 -0.076  0.186  0.023
## [18,] -0.004  0.059  0.110 -0.060  0.377 -0.026 -0.076  1.000 -0.034 -0.043
## [19,]  0.022 -0.007  0.026  0.013 -0.045 -0.091  0.186 -0.034  1.000  0.008
## [20,] -0.071 -0.063 -0.049  0.020 -0.016 -0.102  0.023 -0.043  0.008  1.000

Compare to the original, spearman, and sparCC

## set size of vertex proportional to clr-mean
vsize    <- rowMeans(clr(t(data1), 1))+6
am.coord <- layout.fruchterman.reingold(tmp1)

spear.graph <- pv_cor.adj<0.05
ig.spear <- adj2igraph(spear.graph) ## Create igraph objects

sparcc.graph <- abs(sparcc_res$Cor) >= 0.1
diag(sparcc.graph) <- 0
ig.sparcc <- adj2igraph(sparcc.graph) 

par(mfrow=c(1,3))
plot(tmp1, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="Original")
plot(ig.spear, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="Spearman")
plot(ig.sparcc, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="sparCC")

Problem 3

calculate the Sensitivity and Specificity, interaction network is A1, co-occurrence network is cor_spearman_sig

cor_spearman_sig <- cor_spearman
cor_spearman_sig[pv_cor.adj> 0.05] <- 0
diag(cor_spearman_sig) <- diag(A1) <- NA

TP <- which(cor_spearman_sig*A1>0)
FP <- setdiff(which(cor_spearman_sig!=0), which(A1!=0))
FN <- intersect(which(A1!=0), which(cor_spearman_sig==0))
TN <- intersect(which(cor_spearman_sig==0), which(A1==0))
(Sensitivity <- length(TP)/(length(TP)+length(FN)))

## [1] 0.3529412

(Specificity <- length(TN)/(length(TN)+length(FP)))

## [1] 0.8786127

Problem 4

a brute-force leave-one-out strategy

set.seed(1)
tmp1 <- sample_gnp(n = K, 0.1) 
A1 <- getA(tmp1)
rich_0 <- apply(data1, 2, function(x) sum(round(x)>0))
rich_loo <- matrix(0, 20, 50)
for(i in 1:20) {
  A0 <- A1[-i, ]
  A0 <- A0[, -i]
  tmp <- generateDataSet(S, A0)
  rich_loo[i, ] <- apply(tmp, 2, function(x) sum(round(x)>0))
}

no strong patterns for keystone species in the interaction network

ks <- rich_0[1] - rich_loo[1:20, 1] # one sample
ks

##  [1] 7 5 6 2 6 5 4 4 5 2 6 7 7 5 4 5 5 2 8 5

ks.ave <- colMeans(rich_0 - t(rich_loo))
ks.ave

##  [1] 4.40 4.50 5.04 4.42 4.42 4.44 4.32 4.90 4.60 4.50 4.50 5.84 6.64 5.00 4.66
## [16] 5.42 6.06 3.96 4.56 4.22

LDA analysis

topoX <- data.frame(y=ifelse(ks.ave < median(ks.ave), 1, 0),
                    degree=degree(tmp1), 
                    BC = betweenness(tmp1),
                    CC = closeness(tmp1),
                    TR = transitivity(tmp1))
# TR constant
topoX <- topoX[, 1:4]
set.seed(1)
train <- sample(1:20, 10)
topoX.train <- topoX[train, ]
topoX.test <- topoX[-train, ]
lda.train <- MASS::lda(y~., data=topoX.train)
lda.pred <- predict(lda.train, data=topoX.test)
mean(topoX.test$y == lda.pred$class) # Accuarcy

## [1] 0.6

Problem 5, construct real data co-occurrence network

gdata <- readr::read_csv("GLV/gdata.csv")
group <- read.table('GLV/group.txt')
VFA <- readxl::read_excel("GLV/VFA汇总.xlsx")
VFA <- data.frame(VFA)
rownames(VFA) <- VFA[,1]
VFA <- VFA[, -1]
# remove rare species
idx <- apply(gdata[,-1], 1, function(x) sum(x>0))
gdata <- gdata[idx > 0.2*461, ]
gdata <- as.data.frame(gdata)
rownames(gdata) <- gdata$g
gdata <- gdata[, -1]
dim(gdata)

## [1]  69 461

tmp <- t(sapply(colnames(gdata), function(x) unlist(strsplit(as.character(x), split = '-'))))
group <- data.frame(id = colnames(gdata), site=tmp[,1], day=tmp[,2], time=tmp[,3])
head(group)

##                id site day time
## 005-1-1   005-1-1  005   1    1
## 005-1-2   005-1-2  005   1    2
## 005-1-3   005-1-3  005   1    3
## 005-1-4   005-1-4  005   1    4
## 005-10-1 005-10-1  005  10    1
## 005-10-2 005-10-2  005  10    2

Spearman

cor_spearman <- cor(t(gdata), method = 'spearman')
# diag(cor_spearman) <- NA
pheatmap::pheatmap(cor_spearman, border_color = NA, 
                    color = colorRampPalette(c("navy", "white", "firebrick3"))(50))

null_cor <- list()
tmp <- cor_spearman
K <- nrow(gdata)
gdata <- as.matrix(gdata)
for(i in 1:1000){
  for(j in 1:K) {
    idx <- c(1:K)[-j]
    tmp[j, idx] <-sapply(idx, function(x) cor(gdata[j, ], sample(gdata[x, ]),method = 'spearman'))
  }
  if(i%%100==0) cat(i, '-done!\n')
  null_cor[[i]] <- tmp
}
save(null_cor, file='GLV/null_cor_spearman.rda')

load('GLV/null_cor_spearman.rda')
K <- nrow(cor_spearman)
pv_cor <- matrix(0, K, K)
for(i in 1:1000){
  diag(null_cor[[i]]) <- 0
  pv_cor <- pv_cor + (cor_spearman > null_cor[[i]])
}
pv_cor <- pv_cor/1000
diag(pv_cor) <- NA
pv_cor.adj <- matrix(p.adjust(c(pv_cor), method = 'BH'), K, K)
spear.graph <- pv_cor.adj<0.05
ig.spear <- adj2igraph(spear.graph) ## Create igraph objects
plot(ig.spear, main="spearman", vertex.size = rowMeans(clr(t(gdata), 1))+10)

Sparse multiple regression

library(glmnet)
cor_reg <- matrix(0, K, K)
idx <- sample(1:461, 461*0.7)
gdata_train <- t(gdata[, idx])
gdata_test <- t(gdata[, -idx])
for(i in 1:K) {
  tmp <- cv.glmnet(x = gdata_train[, -i], y = gdata_train[, i], nfolds = 5)
  tmp2 <- glmnet(x = gdata_test[, -i], y = gdata_test[, i], lambda = tmp$lambda.min)
  cor_reg[i, -i] <- as.numeric(tmp2$beta)
}
save(cor_reg, file = 'GLV/cor_reg.rda')

load('GLV/cor_reg.rda')
cor_reg.sig <- cor_reg!=0
ig.smr <- adj2igraph(cor_reg.sig) ## Create igraph objects
plot(ig.smr, main="Sparse multiple regression", vertex.size = rowMeans(clr(t(gdata), 1))+10)

Problem 6

based on spearman network

A1 <- cor_spearman
diag(A1) <- 1
A1[!spear.graph] <- 0
l1 <- max(eigen(A1, only.values = TRUE, symmetric = TRUE)$values)
if (l1 > 0) diag(A1) <- diag(A1) - l1 * 1.01
 
rich_0 <- apply(gdata, 1, function(x) sum(x>0))
rich_loo <- matrix(0, K, N)
for(i in 1:K) {
  A0 <- A1[-i, ]
  A0 <- A0[, -i]
  tmp <- generateDataSet(N, A0)
  rich_loo[i, ] <- apply(tmp, 2, function(x) sum(x>0.0001))
}
save(rich_loo, file='P6_rich_loo.rda')

rich_0 <- apply(gdata, 1, function(x) sum(x>0))
load('P6_rich_loo.rda')
ks.ave <- colMeans(rich_0 - t(rich_loo))
ks.ave

##  [1] 259.5445 260.6790 262.6768 259.9111 260.7354 263.6009 259.5054 260.8200
##  [9] 263.7939 259.5271 261.2430 263.2148 260.0521 260.5510 263.1692 260.0976
## [17] 261.2430 262.0846 260.8134 260.6876 261.6876 261.9588 260.5835 261.0347
## [25] 262.6182 260.6985 260.4577 263.3102 260.1106 260.8980 263.6551 259.0998
## [33] 261.5466 262.9046 260.3015 261.0586 263.1518 259.6573 261.3145 261.9154
## [41] 260.6161 260.7549 261.5401 261.3406 260.9111 260.8872 262.6226 260.5315
## [49] 260.0933 263.2148 260.1714 260.7852 263.6443 258.8872 261.5358 262.5510
## [57] 260.2495 260.6421 263.7809 259.5011 261.3319 262.1605 260.5163 260.6399
## [65] 261.6941 261.1258 260.9154 261.2907 262.0304

tmp <- degree(ig.spear)
gdata_sem <- data.frame(t(gdata[tmp>26, ]))
gdata_sem <- cbind(gdata_sem, group[rownames(gdata_sem),])
gdata_sem <- cbind(gdata_sem, VFA[colnames(gdata), 1:8])
colnames(gdata_sem)

##  [1] "Candidatus_Stoquefichus"           "g_unclassified_f__Lachnospiraceae"
##  [3] "Monoglobus"                        "Prevotella"                       
##  [5] "Succinivibrio"                     "id"                               
##  [7] "site"                              "day"                              
##  [9] "time"                              "Acetate"                          
## [11] "Propionate"                        "Isobutyrate"                      
## [13] "Butyrate"                          "Isovalerate"                      
## [15] "Valerate"                          "TVFA"                             
## [17] "A.P"

gdata_sem$site <- as.numeric(gdata_sem$site)
gdata_sem$day <- as.numeric(gdata_sem$day)
gdata_sem$time <- as.numeric(gdata_sem$time)

library ("lavaan") 
model <- 'f1 =~ Candidatus_Stoquefichus + g_unclassified_f__Lachnospiraceae + Monoglobus + Prevotella + Prevotella + Succinivibrio
f2 =~ Acetate + Propionate + Isobutyrate + Butyrate + Isovalerate + Valerate + A.P
f3 =~ site + day + time
f2~f1+f3
f1~~f2
f1~~f3
f2~~f3
'
fit <- sem(model, data = gdata_sem)

summary(fit, fit.measures=T)

## lavaan 0.6-11 did NOT end normally after 761 iterations
## ** WARNING ** Estimates below are most likely unreliable
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        35
##                                                       
##                                                   Used       Total
##   Number of observations                           444         461
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                                    NA
##   Degrees of freedom                                NA
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   f1 =~                                               
##     Cnddts_Stqfchs    1.000                           
##     g_nclssfd_f__L   57.624       NA                  
##     Monoglobus      -12.926       NA                  
##     Prevotella       23.440       NA                  
##     Succinivibrio    21.492       NA                  
##   f2 =~                                               
##     Acetate           1.000                           
##     Propionate        0.177       NA                  
##     Isobutyrate       0.000       NA                  
##     Butyrate          0.171       NA                  
##     Isovalerate       0.001       NA                  
##     Valerate          0.006       NA                  
##     A.P               0.014       NA                  
##   f3 =~                                               
##     site              1.000                           
##     day               0.399       NA                  
##     time              0.024       NA                  
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   f2 ~                                                
##     f1                0.774       NA                  
##     f3              -92.213       NA                  
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   f1 ~~                                               
##    .f2               -0.022       NA                  
##     f3               -0.000       NA                  
##  .f2 ~~                                               
##     f3               12.333       NA                  
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Cnddts_Stqfchs    0.000       NA                  
##    .g_nclssfd_f__L    0.001       NA                  
##    .Monoglobus        0.000       NA                  
##    .Prevotella        0.001       NA                  
##    .Succinivibrio     0.001       NA                  
##    .Acetate          79.120       NA                  
##    .Propionate        2.737       NA                  
##    .Isobutyrate       0.058       NA                  
##    .Butyrate          1.695       NA                  
##    .Isovalerate       0.070       NA                  
##    .Valerate          0.079       NA                  
##    .A.P               1.236       NA                  
##    .site             10.993       NA                  
##    .day               7.938       NA                  
##    .time              1.249       NA                  
##     f1                0.000       NA                  
##    .f2              -21.682       NA                  
##     f3                0.295       NA

# fitMeasures(fit,c("chisq","df","pvalue","gfi","cfi","rmr","srmr","rmsea")) model did not converge

library(semPlots) # I cann't install
semPaths(fit)

GLV

XX

7/1/2022

Problem 1

Problem 2

Problem 3

Problem 4

Problem 5, construct real data co-occurrence network

Problem 6