I have observed that even if tergmLite::simulate_network
is supposed to be a clone of tergm::simulate.stergm
, in practice their fitted outputs differ.
tergm::simulate.stergm
(which EpiModel::netdx()
is based on) seems to give more as expected results than the tergmLite
version that seems to be biased.
I plan to use tergmLite
for my simulation study because it is much less time consuming than the formal tergm
version.
Here is a reproducible example which shows from a simple network model fitted with EpiModel::netest()
function.
This network has 5966 nodes. This is a purely heterosexual model.
The formation formula :
~ edges + nodefactor("ageg.id") + nodematch("sex.id", diff = FALSE)
The dissolution formula:
~offset(edges)
The following document is separarte into 3 parts:
tergm
tergmLite
tergmLite
with a corrective offset (commented here)rm(list = ls())
# ## instal the latest version of EpiModel amd termLite
# devtools::install_github('statnet/ergm')
# devtools::install_github('statnet/tergm')
# devtools::install_github('statnet/EpiModel')
# devtools::install_github('statnet/tergmLite')
suppressPackageStartupMessages(library(EpiModel))
suppressPackageStartupMessages(library(tergmLite))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(ggplot2))
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 27 (Workstation Edition)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.1.0 stringr_1.3.1 tidyr_0.8.1
## [4] dplyr_0.7.7 tergmLite_1.1.5 EpiModel_1.6.6
## [7] tergm_3.6.0-1657 ergm_3.10.0-4684 networkDynamic_0.9.0
## [10] network_1.14-355 deSolve_1.21
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 lpSolve_5.6.13 purrr_0.2.5
## [4] lattice_0.20-35 colorspace_1.3-2 htmltools_0.3.6
## [7] yaml_2.2.0 rlang_0.3.0.1 pillar_1.3.0
## [10] withr_2.1.2 glue_1.3.0 RColorBrewer_1.1-2
## [13] bindrcpp_0.2.2 trust_0.1-7 foreach_1.4.4
## [16] bindr_0.1.1 plyr_1.8.4 robustbase_0.93-3
## [19] munsell_0.5.0 gtable_0.2.0 codetools_0.2-15
## [22] coda_0.19-2 evaluate_0.11 knitr_1.20
## [25] doParallel_1.0.14 parallel_3.4.3 DEoptimR_1.0-8
## [28] Rcpp_0.12.19 backports_1.1.2 scales_1.0.0
## [31] digest_0.6.18 stringi_1.2.4 grid_3.4.3
## [34] rprojroot_1.3-2 tools_3.4.3 magrittr_1.5
## [37] lazyeval_0.2.1 tibble_1.4.2 crayon_1.3.4
## [40] ape_5.2 pkgconfig_2.0.2 MASS_7.3-47
## [43] Matrix_1.2-12 assertthat_0.2.0 rmarkdown_1.10
## [46] iterators_1.0.10 statnet.common_4.1.4 R6_2.3.0
## [49] nlme_3.1-131 compiler_3.4.3
## load the netest output object
path.to.fit <- '/mnt/data/georgesd/_PROJECTS/_mIARC/_workdir/'
(load(file.path(path.to.fit, 'fit.m.rda')))
## [1] "fit.m"
fit.m
## EpiModel Network Estimation
## =======================
## Model class: netest
## Estimation Method: ERGM with Edges Approximation
##
## Model Form
## -----------------------
## Formation: ~edges + nodefactor("ageg.id") + nodematch("sex.id", diff = FALSE)
## Target Statistics: 1347 280 545 674.5 570 383 205 15.5 0
## Constraints: ~bd(maxout = 2)
##
## Dissolution: ~offset(edges)
## Target Statistics: 734.1257
summary(fit.m)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: nw ~ edges + nodefactor("ageg.id") + nodematch("sex.id", diff = FALSE)
## <environment: 0x55b519a84d78>
##
## Iterations: 86 out of 250
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -15.7319 0.4412 0 -35.657 <1e-04 ***
## nodefactor.ageg.id.2 3.0169 0.2330 0 12.947 <1e-04 ***
## nodefactor.ageg.id.3 3.9329 0.2246 0 17.511 <1e-04 ***
## nodefactor.ageg.id.4 4.1969 0.2246 0 18.689 <1e-04 ***
## nodefactor.ageg.id.5 4.1103 0.2274 0 18.079 <1e-04 ***
## nodefactor.ageg.id.6 4.0416 0.2291 0 17.643 <1e-04 ***
## nodefactor.ageg.id.7 3.9673 0.2369 0 16.748 <1e-04 ***
## nodefactor.ageg.id.8 3.4217 0.3427 0 9.985 <1e-04 ***
## nodematch.sex.id -Inf 0.0000 0 -Inf <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-likelihood was not estimated for this fit.
## To get deviances, AIC, and/or BIC from fit `object$fit` run
## > object$fit<-logLik(object$fit, add=TRUE)
## to add it to the object or rerun this function with eval.loglik=TRUE.
##
## Warning: The following terms have infinite coefficient estimates:
## nodematch.sex.id
##
## Dissolution Coefficients
## =======================
## Dissolution Model: ~offset(edges)
## Target Statistics: 734.1257
## Crude Coefficient: 6.597317
## Mortality/Exit Rate: 0.0002058088
## Adjusted Coefficient: 6.957065
## define the number of steps
nsteps <- 2500
target.stats <- fit.m$target.stats
### with stergm
simulate.with.stergm <- function(x, nsteps){
## extract infos from netest object
nw <- x$fit$network
fit <- x$fit
formation <- x$formation
coef.form <- x$coef.form
dissolution <- x$coef.diss$dissolution
coef.diss <- x$coef.diss
constraints <- x$constraints
target.stats <- x$target.stats
edapprox <- x$edapprox
nwstats.formula <- "formation"
set.control.stergm <- control.simulate.network()
## simulate the initial network
fit.sim <- simulate(formation, basis = nw, coef = x$coef.form.crude, constraints = constraints)
## run the dynamic simulation
diag.sim <- simulate(fit.sim, formation = formation,
dissolution = dissolution, coef.form = coef.form,
coef.diss = coef.diss$coef.crude, time.slices = nsteps,
constraints = constraints, monitor = nwstats.formula,
nsim = 1, control = set.control.stergm)
## extract the summary statistcics
stats.stergm <- as.matrix(attributes(diag.sim)$stats)[1:nsteps, , drop = FALSE]
## resahpe the table to be in a ggplot friendly shape
stat.df.stergm <-
stats.stergm %>%
as.data.frame() %>%
mutate(at = 1:n()) %>%
gather('stat.name', 'stat.value', - at)
}
stat.df.stergm <- simulate.with.stergm(fit.m, nsteps)
## plot the diagnosis plot
stat.df.stergm %>%
ggplot(aes(x = at, y = stat.value, colour = stat.name)) +
geom_hline(yintercept = target.stats, linetype = 3) +
geom_line(size = 1) +
theme_minimal()
### with tergmLite
simulate.with.tergmLite <- function(x, nsteps){
## initialize a epimodel like dat object
dat <- list()
dat$el <- list()
dat$p <- list()
## extract init network
nw <- x$fit$network
## init edge list
el <- as.edgelist(nw)
attributes(el)$vnames <- NULL
## initialize tergmLite object
p <-
tergmLite::stergm_prep(
nw,
x$formation,
x$coef.diss$dissolution,
x$coef.form,
x$coef.diss$coef.adj,
x$constraints
)
p$model.form$formula <- NULL
p$model.diss$formula <- NULL
## init nw params object
nwparam <- x[-which(names(x) == "fit")]
## init nodes attributes
attr.val <-
nw$val %>%
bind_rows()
## create the epimodel like dat object
dat <- list(
attr = attr.val,
el = list(el.main = el),
p = list(p.main = p),
nwparam = list(nwparam.main = nwparam)
)
## simulate nsteps
dat.tmp <- dat
stat.df.tergmLite <- NULL
for(at in 1:nsteps){
# cat('*')
## update p
dat.tmp <- tergmLite::updateModelTermInputs(dat.tmp, network = 1)
## make a step of simulation and update el
el.tmp <-
tergmLite::simulate_network(
p = dat.tmp$p[[1]],
el = dat.tmp$el[[1]],
coef.form = dat.tmp$nwparam[[1]]$coef.form,
coef.diss = dat.tmp$nwparam[[1]]$coef.diss$coef.adj,
save.changes = TRUE
)
dat.tmp$el[[1]] <- el.tmp
## Compute the summary stats
## get in edge list attributes
iel.attr <-
dat$attr %>%
bind_rows() %>%
slice(el.tmp[, 1])
colnames(iel.attr) <- paste0(colnames(iel.attr), '.iel')
## get out edge list attributes
oel.attr <-
dat$attr %>%
bind_rows() %>%
slice(el.tmp[, 2])
colnames(oel.attr) <- paste0(colnames(oel.attr), '.oel')
## merged iel and oel attributes
attr.tmp <-
bind_cols(
iel.attr,
oel.attr
)
## compute the number of edges statistics
stat.edges <-
data_frame(
at = at,
stat.name = 'edges',
stat.value = nrow(el.tmp)
)
## compute the number of edges per age group statistics
stat.nodefactor.ageg.id <-
data_frame(
ageg.id = c(iel.attr$ageg.id.iel, oel.attr$ageg.id.oel)
) %>%
group_by(
ageg.id
) %>%
summarise(
stat.value = n()
) %>%
filter(
ageg.id != 1
) %>%
mutate(
at = at,
stat.name = paste0('nodefactor.ageg.id.', ageg.id)
) %>%
select(at, stat.name, stat.value)
## compute the sex homophily statistics
stat.nodematch.sex.id <-
attr.tmp %>%
summarise(
stat.value = sum(sex.id.iel == sex.id.oel)
) %>%
mutate(
at = at,
stat.name = 'nodematch.sex.id'
) %>%
select(at, stat.name, stat.value)
## store results
stat.df.tergmLite <-
bind_rows(
stat.df.tergmLite,
stat.edges,
stat.nodefactor.ageg.id,
stat.nodematch.sex.id
)
}
return(stat.df.tergmLite)
}
stat.df.tergmLite <- simulate.with.tergmLite(fit.m, nsteps)
## plot the diagnosis plot
stat.df.tergmLite %>%
ggplot(aes(x = at, y = stat.value, colour = stat.name)) +
geom_hline(yintercept = target.stats, linetype = 3) +
geom_line(size = 1) +
theme_minimal()
# ## attempt to find a correction coeff
#
# # raised.target <-
# # stat.df.tergmLite %>%
# # filter(
# # stat.name == 'edges',
# # at > 5000
# # ) %>%
# # summarise(
# # raised.target = mean(stat.value)
# # ) %>%
# # pull(raised.target)
# #
# # formal.target <- target.stats['edges']
# #
# # corect.coef <- - log(raised.target / formal.target)
#
#
# ## define a function that apply a corrective coefficient to a netest object. This can be
# ## useful when data collection time and simulation time step are different
# offset.correction.netest <-
# function(
# fit.i,
# obs.dur.offset = - log(52)
# ) {
# # browser()
# ## coeff correction
# fit.i$coef.form['edges'] <- fit.i$coef.form['edges'] + obs.dur.offset
# fit.i$coef.form.crude['edges'] <- fit.i$coef.form.crude['edges'] + obs.dur.offset
# fit.i$fit$coef['edges'] <- fit.i$fit$coef['edges'] + obs.dur.offset
# fit.i$fit$MCMCtheta['edges'] <- fit.i$fit$MCMCtheta['edges'] + obs.dur.offset
# ## target corection
# target.stats <- fit.i$target.stats
# target.to.correct <-
# names(target.stats) %>%
# str_subset('^edges|^absdiffby|^nodefactor|^nodematch')
# target.offset <- 1 / exp(- obs.dur.offset)
# fit.i$target.stats[target.to.correct] <- target.stats[target.to.correct] * target.offset
# return(fit.i)
# }
#
#
# corect.coef <- - log(1.5)
# fit.m.corr <- offset.correction.netest(fit.m, corect.coef)
#
# stat.df.tergmLite <- simulate.with.tergmLite(fit.m.corr, nsteps)
#
# ## plot the diagnosis plot
# stat.df.tergmLite %>%
# ggplot(aes(x = at, y = stat.value, colour = stat.name)) +
# geom_hline(yintercept = target.stats, linetype = 3) +
# geom_line(size = 1) +
# theme_minimal()
#