#The causal diagram explains the interactive effects of different soil paramters on the microbial biomass carbon.
#Increase in soil organic carbon and available nitrogen contents enhance microbial growth which is in turn positively affect dehydrogenase activity- a indicator of soil microbial profile
#Additionally, higher percentage sand content reflects less good environment for microbial growth in terms of nutrients supplies. Also, increase in electrical conductvity make soil unsuitable for plant and microbial growth due to high salinity which will in turn affect microbial dehydrogenase activity.
#I therefore model the signficance of different predictors of microbial biomass carbon.
#The model is written in the "dot" language
library(DiagrammeR)
Causalmodel <-
"digraph{
rankdir = LR;
Microbial_biomass_carbon -> Soil_Organic_carbon [label='+'];
Microbial_biomass_carbon -> Available_Nitrogen [label='+'];
Microbial_biomass_carbon ->Dehydrogenase_activity[label='+'];
Microbial_biomass_carbon -> Electrical_conductivity [label='-'];
Soil_Organic_carbon-> Electrical_conductivity [label='-'];
Dehydrogenase_activity-> Electrical_conductivity [label='-'];
Microbial_biomass_carbon-> Percent_Sand_content[label='-'];
Microbial_biomass_carbon-> Percent_Clay[label='+']
}"
grViz(Causalmodel)
help("grViz")
## Warning in stats::runif(10): '.Random.seed' is not an integer vector but of
## type 'NULL', so ignored
## starting httpd help server ...
## done
#Lavaan modeling
library(lavaan)
## This is lavaan 0.5-23.1097
## lavaan is BETA software! Please report any bugs.
library(semPlot)
source("lavaan_modavg.R")
getwd()
## [1] "C:/Users/Paul/Documents/Rwork"
t.dat<- read.csv("Microbial biomass data.csv")
# specify "mod.1"
mod.1 <- 'Microbial_biomass_carbon ~ Available_Nitrogen + Soil_organic_carbon + Dehydrogenase_activity + Percent_Sand_content
Dehydrogenase_activity ~ Electrical_conductivity + Microbial_biomass_carbon + Percent_Sand_content'
# Fit "mod.1"
mod.1.fit <- sem(mod.1, data=t.dat)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
summary(mod.1.fit,standardized=TRUE)
## lavaan (0.5-23.1097) converged normally after 95 iterations
##
## Number of observations 200
##
## Estimator ML
## Minimum Function Test Statistic 2.395
## Degrees of freedom 2
## P-value (Chi-square) 0.302
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Microbial_biomass_carbon ~
## Availabl_Ntrgn 3.872 2.122 1.825 0.068
## Soil_rgnc_crbn 643.390 254.494 2.528 0.011
## Dhydrgns_ctvty 4.824 38.580 0.125 0.900
## Prcnt_Snd_cntn -3.208 2.185 -1.468 0.142
## Dehydrogenase_activity ~
## Elctrcl_cndctv 0.384 0.957 0.402 0.688
## Mcrbl_bmss_crb 0.002 0.002 0.773 0.440
## Prcnt_Snd_cntn -0.010 0.016 -0.637 0.524
## Std.lv Std.all
##
## 3.872 0.137
## 643.390 0.177
## 4.824 0.039
## -3.208 -0.115
##
## 0.384 0.028
## 0.002 0.233
## -0.010 -0.046
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv
## .Mcrbl_bmss_crb 59702.135 10569.038 5.649 0.000 59702.135
## .Dhydrgns_ctvty 3.855 0.396 9.732 0.000 3.855
## Std.all
## 0.924
## 0.923
#Results show that the minimum test statistics=11.73
#I then added another path to my model in model 2
#Specify "model.2"
mod.2 <- 'Microbial_biomass_carbon ~ Available_Nitrogen + Soil_organic_carbon + Dehydrogenase_activity + Percent_Sand_content
Dehydrogenase_activity ~ Electrical_conductivity + Microbial_biomass_carbon + Percent_Sand_content + Percent_Clay'
#fit mod.2
mod.2.fit <- sem(mod.2, data=t.dat)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
summary(mod.2.fit,standardized=TRUE)
## lavaan (0.5-23.1097) converged normally after 94 iterations
##
## Number of observations 200
##
## Estimator ML
## Minimum Function Test Statistic 2.544
## Degrees of freedom 3
## P-value (Chi-square) 0.467
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Microbial_biomass_carbon ~
## Availabl_Ntrgn 3.883 2.115 1.836 0.066
## Soil_rgnc_crbn 644.503 254.554 2.532 0.011
## Dhydrgns_ctvty 3.987 36.542 0.109 0.913
## Prcnt_Snd_cntn -3.224 2.170 -1.486 0.137
## Dehydrogenase_activity ~
## Elctrcl_cndctv 0.226 0.961 0.235 0.814
## Mcrbl_bmss_crb 0.002 0.002 0.845 0.398
## Prcnt_Snd_cntn -0.006 0.016 -0.378 0.705
## Percent_Clay 0.030 0.025 1.179 0.238
## Std.lv Std.all
##
## 3.883 0.138
## 644.503 0.177
## 3.987 0.032
## -3.224 -0.116
##
## 0.226 0.016
## 0.002 0.240
## -0.006 -0.028
## 0.030 0.083
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv
## .Mcrbl_bmss_crb 59894.049 10409.485 5.754 0.000 59894.049
## .Dhydrgns_ctvty 3.826 0.389 9.836 0.000 3.826
## Std.all
## 0.927
## 0.917
#After fitting mode.2, the test statistics increases which was expected to decrease, i now had to revisit my first model to create model 3 by removing two paths
#Specify "model.3"
mod.3 <- 'Microbial_biomass_carbon ~ Available_Nitrogen + Soil_organic_carbon + Dehydrogenase_activity + Percent_Sand_content
Dehydrogenase_activity ~ Electrical_conductivity + Microbial_biomass_carbon'
#fit mod.3
mod.3.fit <- sem(mod.3,data=t.dat)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
summary(mod.3.fit)
## lavaan (0.5-23.1097) converged normally after 109 iterations
##
## Number of observations 200
##
## Estimator ML
## Minimum Function Test Statistic 2.792
## Degrees of freedom 3
## P-value (Chi-square) 0.425
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Microbial_biomass_carbon ~
## Availabl_Ntrgn 4.009 2.188 1.832 0.067
## Soil_rgnc_crbn 657.812 263.806 2.494 0.013
## Dhydrgns_ctvty -6.007 38.285 -0.157 0.875
## Prcnt_Snd_cntn -3.414 2.164 -1.578 0.115
## Dehydrogenase_activity ~
## Elctrcl_cndctv 0.274 0.956 0.287 0.774
## Mcrbl_bmss_crb 0.003 0.002 1.115 0.265
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Mcrbl_bmss_crb 62636.348 13614.993 4.601 0.000
## .Dhydrgns_ctvty 3.865 0.401 9.650 0.000
#Gettin the same ML value
#Next, i tried the model3 again with reduced sample size (from 310 to 200)
#Specify model 4
library(lavaan)
library(semPlot)
getwd()
## [1] "C:/Users/Paul/Documents/Rwork"
t.dat<- read.csv("Microbial biomass data.csv")
mod.4 <- 'Microbial_biomass_carbon ~ Available_Nitrogen + Soil_organic_carbon + Dehydrogenase_activity + Percent_Sand_content
Dehydrogenase_activity ~ Electrical_conductivity + Microbial_biomass_carbon'
#fit mod.4
mod.4.fit <- sem(mod.4,data=t.dat)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
summary(mod.4.fit,standardized=T, fit.measures=TRUE)
## lavaan (0.5-23.1097) converged normally after 109 iterations
##
## Number of observations 200
##
## Estimator ML
## Minimum Function Test Statistic 2.792
## Degrees of freedom 3
## P-value (Chi-square) 0.425
##
## Model test baseline model:
##
## Minimum Function Test Statistic 30.789
## Degrees of freedom 9
## P-value 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 1.000
## Tucker-Lewis Index (TLI) 1.029
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -2883.500
## Loglikelihood unrestricted model (H1) -2882.104
##
## Number of free parameters 8
## Akaike (AIC) 5783.000
## Bayesian (BIC) 5809.386
## Sample-size adjusted Bayesian (BIC) 5784.041
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent Confidence Interval 0.000 0.116
## P-value RMSEA <= 0.05 0.620
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.025
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Microbial_biomass_carbon ~
## Availabl_Ntrgn 4.009 2.188 1.832 0.067
## Soil_rgnc_crbn 657.812 263.806 2.494 0.013
## Dhydrgns_ctvty -6.007 38.285 -0.157 0.875
## Prcnt_Snd_cntn -3.414 2.164 -1.578 0.115
## Dehydrogenase_activity ~
## Elctrcl_cndctv 0.274 0.956 0.287 0.774
## Mcrbl_bmss_crb 0.003 0.002 1.115 0.265
## Std.lv Std.all
##
## 4.009 0.142
## 657.812 0.181
## -6.007 -0.048
## -3.414 -0.123
##
## 0.274 0.020
## 0.003 0.318
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv
## .Mcrbl_bmss_crb 62636.348 13614.993 4.601 0.000 62636.348
## .Dhydrgns_ctvty 3.865 0.401 9.650 0.000 3.865
## Std.all
## 0.969
## 0.926
#Model 4 gives a reasonable ML value
#Comparing the modles using their aic values
source("lavaan_modavg.R")
anova(mod.1.fit, mod.2.fit, mod.3.fit,mod.4.fit)
## Warning in lavTestLRT(object = <S4 object of class structure("lavaan",
## package = "lavaan")>, : lavaan WARNING: some models are based on a
## different set of observed variables
## Chi Square Difference Test
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## mod.1.fit 2 5784.6 5814.3 2.3950
## mod.2.fit 3 7031.9 7064.9 2.5442 0.14919 1 0.6993
## mod.3.fit 3 5783.0 5809.4 2.7920 0.24778 0 <2e-16 ***
## mod.4.fit 3 5783.0 5809.4 2.7920 0.00000 0 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aictab.lavaan(list(mod.1.fit, mod.2.fit,mod.3.fit,mod.4.fit), c("mod.1", "mod.2","mod.3", "mod.4"))
## Warning in aictab.lavaan(list(mod.1.fit, mod.2.fit, mod.3.fit, mod.4.fit), :
## Check model structure carefully as some models may be redundant
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## mod.4 8 5783.43 0.00 0.41 0.41 -2883.50
## mod.3 8 5783.43 0.00 0.41 0.83 -2883.50
## mod.1 9 5785.19 1.75 0.17 1.00 -2883.30
## mod.2 10 7032.70 1249.26 0.00 1.00 -3505.97
# Results from comparing the models showed that mod.4 best described the relationship
fitMeasures(mod.4.fit)
## npar fmin chisq
## 8.000 0.007 2.792
## df pvalue baseline.chisq
## 3.000 0.425 30.789
## baseline.df baseline.pvalue cfi
## 9.000 0.000 1.000
## tli nnfi rfi
## 1.029 1.029 0.728
## nfi pnfi ifi
## 0.909 0.303 1.007
## rni logl unrestricted.logl
## 1.010 -2883.500 -2882.104
## aic bic ntotal
## 5783.000 5809.386 200.000
## bic2 rmsea rmsea.ci.lower
## 5784.041 0.000 0.000
## rmsea.ci.upper rmsea.pvalue rmr
## 0.116 0.620 3.051
## rmr_nomean srmr srmr_bentler
## 3.051 0.025 0.025
## srmr_bentler_nomean srmr_bollen srmr_bollen_nomean
## 0.025 0.025 0.025
## srmr_mplus srmr_mplus_nomean cn_05
## 0.025 0.025 560.800
## cn_01 gfi agfi
## 813.678 0.986 0.904
## pgfi mfi ecvi
## 0.141 1.001 0.094
semPaths(mod.4.fit,whatLabels="par",layout="spring")
## Warning in qgraph(Edgelist, labels = nLab, bidirectional = Bidir, directed
## = Directed, : The following arguments are not documented and likely not
## arguments of qgraph and thus ignored: loopRotation; residuals; residScale;
## residEdge; CircleEdgeEnd

#The fitted model shows that microbial biomass carbon is primarily determined by the amount of organic carbon which is a source of energy for microbes and for enzyme production, available nitrogen is alos so to very important to microbes as its plays key role in protein sysnthesis. In addition, percentage sand content shows the fertility of the soil which in turn determines primary production with a feedback loop to microbial biomass carbon via the amount of nutrients available in the soil. Furthermore, dehydrogenase activity is largely detetmined by microbial biomass and soil electrical conductivity (relating to soil salinity)