Preliminaries

Packages

if(params$version == "3"){
  pkglib <- "~/R/GHmaster-library"
} else {
  pkglib <- "~/R/GHdev-library"
}

defaultPaths <- .libPaths()
.libPaths(c(pkglib, defaultPaths))
.libPaths()
## [1] "/homes/morrism/R/GHmaster-library"               
## [2] "/homes/morrism/R/x86_64-pc-linux-gnu-library/4.0"
## [3] "/usr/lib/R/library"                              
## [4] "/net/debian10/lib/R/site-library"
devtools::dev_mode(on = T, path = pkglib)

library(ergm.ego, lib.loc = pkglib)
sessioninfo::package_info(
  pkgs = c("network","statnet.common","ergm","tergm", "ergm.ego"),
  dependencies = FALSE)
##  package        * version     date       lib
##  ergm           * 3.11.0-5793 2020-10-13 [1]
##  ergm.ego       * 0.6.0-509   2020-10-12 [1]
##  network        * 1.17.0-585  2020-10-13 [1]
##  statnet.common   4.4.0-300   2020-10-12 [1]
##  tergm            3.7.0-2009  2020-10-12 [1]
##  source                                 
##  Github (statnet/ergm@a4a99ff)          
##  Github (statnet/ergm.ego@ac9b2cd)      
##  Github (statnet/network@91b3c07)       
##  Github (statnet/statnet.common@f8fb190)
##  Github (statnet/tergm@ea2f11c)         
## 
## [1] /homes/morrism/R/GHmaster-library
## [2] /homes/morrism/R/x86_64-pc-linux-gnu-library/4.0
## [3] /usr/lib/R/library
## [4] /net/debian10/lib/R/site-library
library(dplyr)
library(ggplot2)
library(kableExtra)
library(here)

Data

# load original egodata and fits
load(here("Data", "stergmFits", 
          paste0("fits_mc_", params$modelmc, params$df, params$version, ".rda")))
load(here("Data", "stergmFits", 
          paste0("netest_inst_", params$modelinst, params$df, params$version, ".rda")))

# Obs egodata

obsegos <- fit_main$egodata$egos %>%
  mutate(instperwk = count.oo.part/52)

Simulated degrees

nsims <- 50

casl_sims <- data.frame(
  simulate(fit_casl, nsim = nsims, output = "stats", 
           monitor = ~sociality(nodes = T),
           simulate.control = control.simulate(MCMC.interval = 1e5))
  ) 
casl_simego_mdeg <- casl_sims %>%
  summarize(across(starts_with("sociality"), mean))
  
main_sims <- data.frame(
  simulate(fit_main, nsim = nsims, output = "stats", 
           monitor = ~sociality(nodes = T),
           simulate.control = control.simulate(MCMC.interval = 1e5))
  ) 
main_simego_mdeg <- main_sims %>%
  summarize(across(starts_with("sociality"), mean))

inst_sims <- data.frame(
  simulate(netest_inst$fit, nsim = nsims, output = "stats", 
           monitor = ~sociality(nodes = T))
  ) 
inst_simego_mdeg <- inst_sims %>%
  summarize(across(starts_with("sociality"), mean))

source <- paste("the mean of", nsims, "simulations from the fitted model")

# Simulated Nodeset (they're all the same across the nets)
simegos <- as.egodata(fit_main$newnetwork)$egos

# Simulated Degrees

simegos <- data.frame(simegos, 
                      sim.casl = unname(t(casl_simego_mdeg)), 
                      sim.main = unname(t(main_simego_mdeg)), 
                      sim.instperwk = unname(t(inst_simego_mdeg))
                      )


# Obs mean deg by age (weighted)
## These are used as the points on the graphs
obsmeandegs <- obsegos %>%
  group_by(trunc(age)) %>%
  summarize(main_deg = weighted.mean(deg.main, ego.wawt),
            casl_deg = weighted.mean(deg.casl, ego.wawt),
            inst_wk = weighted.mean(instperwk, ego.wawt),
            wtdObs = sum(ego.wawt)) %>%
  rename(age = `trunc(age)`)

# Sim mean deg by age (self-weighted)
simmeandegs <- simegos %>%
  group_by(trunc(age)) %>%
  summarize(main_deg = mean(sim.main),
            casl_deg = mean(sim.casl),
            inst_wk = mean(sim.instperwk),
            simObs = n()) %>%
  rename(age = `trunc(age)`)

Introduction

Package versions: 3 Model: MC hiv_caxr_, Inst hiv_

This report presents mean degree estimates at the front of the workflow:

  • Observed data: The original survey egodata, restricted to the analytic sample. All estimates are based on the egos dataframe in the egodata object.

  • Predicted: The predicted values come from the mean of 50 simulations from the fitted model. Each network is a stochastic draw from the model distribution, with size equal to the scaled-up pseudo-population (about 15K). There is stochastic variability in these networks that is not represented here, we just plot the mean. The predicted mean degree is first taken across simulations for each node, we then plot the mean degree by age for all persons of that age.

Breakdowns of mean degree by network (main, casl, inst), and by age, race and region are included.

The tables are best used to validate the target values used in simulation diagnostics. They are not set up to show Observed, Predicted and Difference. For comparing observed to predicted, use the graphs.

Model Fits

The main and casl models are the same. Both use the continuous version of age and age^2. Inst has a different set of covariates, and uses the factor version age.cat.

Main

summary(fit_main)
## Call:
## ergm(formula = ergm.formula, offset.coef = ergm.offset.coef, 
##     target.stats = m, eval.loglik = FALSE, control = control$ergm.control)
## 
## Iterations:  3 out of 20 
## 
## Monte Carlo MLE Results:
##                                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## offset(netsize.adj)           -9.6156721  0.0000000      0    -Inf  < 1e-04 ***
## edges                         -1.7457928  1.6612967      0  -1.051 0.293322    
## nodecov.age<16                -0.7116056  1.0955539      0  -0.650 0.515990    
## nodefactor.race.B             -1.7185512  3.3130831      0  -0.519 0.603958    
## nodefactor.race.H             -2.8046110  2.7834130      0  -1.008 0.313639    
## nodefactor.region.EasternWA    1.6155421  0.8623229      0   1.873 0.061003 .  
## nodefactor.region.WesternWA    0.1570598  0.6765327      0   0.232 0.816418    
## absdiff.sqrt.age              -1.0901238  0.1392804      0  -7.827  < 1e-04 ***
## nodematch.race.B               0.8408320  0.6278547      0   1.339 0.180501    
## nodematch.race.H               0.0349698  0.8175205      0   0.043 0.965881    
## nodematch.race.O               0.6468605  0.7239327      0   0.894 0.371570    
## nodematch.region.EasternWA     1.7682454  1.1126373      0   1.589 0.112007    
## nodematch.region.King          3.5037830  0.7945541      0   4.410  < 1e-04 ***
## nodematch.region.WesternWA     3.4959519  0.7966925      0   4.388  < 1e-04 ***
## nodecov.age.B                  0.0419741  0.1911893      0   0.220 0.826228    
## nodecov.age.H                  0.1166554  0.1334976      0   0.874 0.382206    
## nodecov.age.O                 -0.0310316  0.0383845      0  -0.808 0.418836    
## nodecov.age2.B                -0.0003917  0.0029095      0  -0.135 0.892894    
## nodecov.age2.H                -0.0012739  0.0017937      0  -0.710 0.477557    
## nodecov.age2.O                 0.0001968  0.0004841      0   0.407 0.684326    
## concurrent                    -1.1082134  0.3627463      0  -3.055 0.002250 ** 
## nodefactor.deg.casl>0.TRUE    -0.5956534  0.1961995      0  -3.036 0.002398 ** 
## nodefactor.status.1            0.7117195  0.2094604      0   3.398 0.000679 ***
## nodematch.status               0.7268721  0.2606028      0   2.789 0.005284 ** 
## offset(nodematch.role.type.0)       -Inf  0.0000000      0    -Inf  < 1e-04 ***
## offset(nodematch.role.type.1)       -Inf  0.0000000      0    -Inf  < 1e-04 ***
## offset(nodecov.age>66)              -Inf  0.0000000      0    -Inf  < 1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
##  The following terms are fixed by offset and are not estimated:
##   offset(netsize.adj) offset(nodematch.role.type.0) offset(nodematch.role.type.1) offset(nodecov.age>66)

Casl

summary(fit_casl)
## Call:
## ergm(formula = ergm.formula, offset.coef = ergm.offset.coef, 
##     target.stats = m, eval.loglik = FALSE, control = control$ergm.control)
## 
## Iterations:  4 out of 20 
## 
## Monte Carlo MLE Results:
##                                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## offset(netsize.adj)           -9.6156721  0.0000000      0    -Inf  < 1e-04 ***
## edges                         -3.6826239  1.2039616      0  -3.059  0.00222 ** 
## nodecov.age<16                -0.5099104  1.0292215      0  -0.495  0.62029    
## nodefactor.race.B              4.0570091  1.8927877      0   2.143  0.03208 *  
## nodefactor.race.H              1.8975483  1.2423802      0   1.527  0.12667    
## nodefactor.region.EasternWA   -1.7539792  0.7004810      0  -2.504  0.01228 *  
## nodefactor.region.WesternWA   -2.2432938  0.7360261      0  -3.048  0.00230 ** 
## absdiff.sqrt.age              -0.5354458  0.0701823      0  -7.629  < 1e-04 ***
## nodematch.race.B              -0.2620150  0.7131808      0  -0.367  0.71333    
## nodematch.race.H              -0.1997810  0.4382287      0  -0.456  0.64847    
## nodematch.race.O               0.6859431  0.2960800      0   2.317  0.02052 *  
## nodematch.region.EasternWA     4.8870649  0.7495194      0   6.520  < 1e-04 ***
## nodematch.region.King         -0.0610390  0.7778496      0  -0.078  0.93745    
## nodematch.region.WesternWA     4.9457398  0.7318802      0   6.758  < 1e-04 ***
## nodecov.age.B                 -0.1470067  0.1192562      0  -1.233  0.21769    
## nodecov.age.H                  0.0386351  0.0698663      0   0.553  0.58027    
## nodecov.age.O                  0.0710717  0.0229612      0   3.095  0.00197 ** 
## nodecov.age2.B                 0.0024880  0.0017485      0   1.423  0.15476    
## nodecov.age2.H                -0.0007346  0.0010515      0  -0.699  0.48480    
## nodecov.age2.O                -0.0007501  0.0002743      0  -2.735  0.00625 ** 
## concurrent                     0.0997768  0.1570478      0   0.635  0.52521    
## nodefactor.deg.main>0.TRUE    -0.3013060  0.1487422      0  -2.026  0.04280 *  
## nodefactor.status.1            0.7417955  0.1094832      0   6.775  < 1e-04 ***
## nodematch.status               1.0363959  0.1329844      0   7.793  < 1e-04 ***
## offset(nodematch.role.type.0)       -Inf  0.0000000      0    -Inf  < 1e-04 ***
## offset(nodematch.role.type.1)       -Inf  0.0000000      0    -Inf  < 1e-04 ***
## offset(nodecov.age>66)              -Inf  0.0000000      0    -Inf  < 1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
##  The following terms are fixed by offset and are not estimated:
##   offset(netsize.adj) offset(nodematch.role.type.0) offset(nodematch.role.type.1) offset(nodecov.age>66)

Inst

Inst model was fit with target stats, not ergm.ego.

summary(netest_inst$fit)
## Call:
## ergm(formula = formation.nw, constraints = constraints, offset.coef = coef.form, 
##     target.stats = target.stats, eval.loglik = FALSE, control = set.control.ergm, 
##     verbose = verbose)
## 
## Iterations:  2 out of 20 
## 
## Monte Carlo MLE Results:
##                                Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges                         -17.96907    0.42708      0 -42.074  < 1e-04 ***
## nodefactor.race.B               0.52155    0.26245      0   1.987 0.046895 *  
## nodefactor.race.H               1.21270    0.28320      0   4.282  < 1e-04 ***
## nodematch.race.B                1.08205    0.45562      0   2.375 0.017553 *  
## nodematch.race.H               -0.37952    0.37853      0  -1.003 0.316056    
## nodematch.race.O                0.95035    0.30047      0   3.163 0.001562 ** 
## nodefactor.age.grp.2           -0.53248    0.09846      0  -5.408  < 1e-04 ***
## nodefactor.age.grp.3           -0.23983    0.09925      0  -2.416 0.015675 *  
## nodefactor.age.grp.4           -0.28402    0.09896      0  -2.870 0.004106 ** 
## nodefactor.age.grp.5           -0.34947    0.10148      0  -3.444 0.000574 ***
## nodefactor.age.grp.6               -Inf    0.00000      0    -Inf  < 1e-04 ***
## absdiff.sqrt.age               -0.56735    0.05260      0 -10.787  < 1e-04 ***
## nodefactor.risk.grp.4           2.41463    0.15085      0  16.007  < 1e-04 ***
## nodefactor.risk.grp.5           4.25872    0.13302      0  32.015  < 1e-04 ***
## nodefactor.status.1             1.00768    0.05805      0  17.360  < 1e-04 ***
## nodematch.status                1.46524    0.11365      0  12.892  < 1e-04 ***
## nodefactor.deg.casl>0.TRUE     -0.39738    0.07014      0  -5.665  < 1e-04 ***
## nodefactor.deg.main>0.TRUE     -0.68118    0.06731      0 -10.120  < 1e-04 ***
## offset(nodematch.role.type.0)      -Inf    0.00000      0    -Inf  < 1e-04 ***
## offset(nodematch.role.type.1)      -Inf    0.00000      0    -Inf  < 1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
##  Warning: The following terms have infinite coefficient estimates:
##   nodefactor.age.grp.6 
## 
##  The following terms are fixed by offset and are not estimated:
##   offset(nodematch.role.type.0) offset(nodematch.role.type.1)

Reference Tables

With theage.grp breakdowns, deviations of observed from predicted values may occur for the Main and Casl models, because those models use continuous age, not the categorical age.grp variable.

Observed

tabtot <- obsegos %>%
  summarize(main_deg = weighted.mean(deg.main, ego.wawt),
            casl_deg = weighted.mean(deg.casl, ego.wawt),
            inst_wk = weighted.mean(instperwk, ego.wawt),
            num = n(),
            wtdNum = round(sum(ego.wawt),0),
            wtdPct = 100.0) %>%
  mutate(Breakdown_by = "Total") %>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, wtdNum, wtdPct)

tabage <- obsegos %>%
  group_by(age.grp) %>%
  summarize(main_deg = weighted.mean(deg.main, ego.wawt),
            casl_deg = weighted.mean(deg.casl, ego.wawt),
            inst_wk = weighted.mean(instperwk, ego.wawt),
            num = n(),
            wtdNum = round(sum(ego.wawt),0),
            wtdPct = round(100*wtdNum/tabtot$wtdNum,1)) %>%
  mutate(Breakdown_by = factor(age.grp)) %>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, wtdNum, wtdPct)

tabrace <- obsegos %>%
  group_by(race) %>%
  summarize(main_deg = weighted.mean(deg.main, ego.wawt),
            casl_deg = weighted.mean(deg.casl, ego.wawt),
            inst_wk = weighted.mean(instperwk, ego.wawt),
            num = n(),
            wtdNum = round(sum(ego.wawt),0),
            wtdPct = round(100*wtdNum/tabtot$wtdNum,1))%>%
  mutate(Breakdown_by = race)%>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, wtdNum, wtdPct)

tabregion <- obsegos %>%
  group_by(region) %>%
  summarize(main_deg = weighted.mean(deg.main, ego.wawt),
            casl_deg = weighted.mean(deg.casl, ego.wawt),
            inst_wk = weighted.mean(instperwk, ego.wawt),
            num = n(),
            wtdNum = round(sum(ego.wawt),0),
            wtdPct = round(100*wtdNum/tabtot$wtdNum,1))%>%
  mutate(Breakdown_by = region)%>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, wtdNum, wtdPct)

tab <- bind_rows(tabage, tabrace, tabregion, tabtot)

kable(tab, digits = 2,
      caption = "Observed Mean Degree") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  pack_rows(index = c("Age Group" = 5, 
                      "Race" = 3, 
                      "Region" = 3)) %>%
  row_spec(12, bold = TRUE, font_size = 16, background = "grey")
Observed Mean Degree
Breakdown_by main_deg casl_deg inst_wk num wtdNum wtdPct
Age Group
1 0.31 0.42 0.05 126 144 17.3
2 0.41 0.69 0.11 252 212 25.5
3 0.31 1.13 0.12 168 177 21.3
4 0.19 1.05 0.10 149 153 18.4
5 0.18 1.35 0.10 137 147 17.7
Race
B 0.22 1.11 0.11 30 56 6.7
H 0.28 0.95 0.08 94 93 11.2
O 0.30 0.90 0.10 708 683 82.1
Region
EasternWA 0.34 0.75 0.07 105 83 10.0
King 0.31 0.91 0.10 409 472 56.7
WesternWA 0.25 0.99 0.10 318 277 33.3
Total 0.29 0.92 0.10 832 832 100.0

Predicted

tabtot <- simegos %>%
  summarize(main_deg = mean(sim.main),
            casl_deg = mean(sim.casl),
            inst_wk = mean(sim.instperwk),
            num = n(),
            pct = 100.0) %>%
  mutate(Breakdown_by = "Total") %>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, pct)

tabage <- simegos %>%
  group_by(age.grp) %>%
  summarize(main_deg = mean(sim.main),
            casl_deg = mean(sim.casl),
            inst_wk = mean(sim.instperwk),
            num = n(),
            pct = round(100*num/tabtot$num,1)) %>%
  mutate(Breakdown_by = factor(age.grp)) %>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, pct)

tabrace <- simegos %>%
  group_by(race) %>%
  summarize(main_deg = mean(sim.main),
            casl_deg = mean(sim.casl),
            inst_wk = mean(sim.instperwk),
            num = n(),
            pct = round(100*num/tabtot$num,1))%>%
  mutate(Breakdown_by = race)%>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, pct)

tabregion <- simegos %>%
  group_by(region) %>%
  summarize(main_deg = mean(sim.main),
            casl_deg = mean(sim.casl),
            inst_wk = mean(sim.instperwk),
            num = n(),
            pct = round(100*num/tabtot$num,1))%>%
  mutate(Breakdown_by = region)%>%
  select(Breakdown_by, main_deg, casl_deg, inst_wk, num, pct)

tab <- bind_rows(tabage, tabrace, tabregion, tabtot)

kable(tab, digits = 2,
      caption = "Predicted Mean Degree (based on one draw from the model)") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  pack_rows(index = c("Age Group" = 5, 
                      "Race" = 3, 
                      "Region" = 3)) %>%
  row_spec(12, bold = TRUE, font_size = 16, background = "grey")
Predicted Mean Degree (based on one draw from the model)
Breakdown_by main_deg casl_deg inst_wk num pct
Age Group
1 0.33 0.56 0.06 2592 17.3
2 0.37 0.84 0.09 3756 25.0
3 0.30 1.10 0.12 3202 21.3
4 0.26 1.20 0.13 2796 18.6
5 0.17 0.84 0.11 2652 17.7
Race
B 0.26 1.21 0.08 1010 6.7
H 0.30 1.04 0.11 1661 11.1
O 0.29 0.87 0.10 12327 82.2
Region
EasternWA 0.34 0.79 0.09 1473 9.8
King 0.31 0.90 0.11 8526 56.8
WesternWA 0.26 0.98 0.10 4999 33.3
Total 0.29 0.91 0.10 14998 100.0

Graphs

These plots are a better choice for comparing the ergm fits to the observed data. Keep in mind the fit is a single draw from the model. The observed data are represented by both points and a smoother. The predicted is just a smoother.

Each network by Age

The points on these plots are degree means for each age. For the predicted values, each node has a predicted mean degree across 50 simulations. Scatterplots of observed vs. predicted means are in the section on lack of fit.

Main

colors <- c("pred" = "slateblue", 
            "obs" = "darkorange3")
            
ggplot(simegos, aes(x = age, y = sim.main, color = "pred")) +
  geom_smooth() +
  geom_smooth(data=obsegos, 
             aes(x=age, y = deg.main, color = "obs",
                 weight=ego.wawt)) +
  geom_point(data=simmeandegs, 
             aes(x=age, y = main_deg, #size=simObs, 
                 color = "pred"), 
             alpha=0.3) +
  geom_point(data=obsmeandegs, 
             aes(x=age, y = main_deg, size=wtdObs, color = "obs"), 
             alpha=0.3) +
  labs(title = "Main Mean Degree by Age",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelmc, 
                       ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Casl

ggplot(simegos, aes(x = age, y = sim.casl, color = "pred")) +
  geom_smooth() +
  geom_smooth(data=obsegos, 
              aes(x=age, y = deg.casl, 
                  color = "obs",
                  weight = ego.wawt)) +
  geom_point(data=simmeandegs, 
             aes(x=age, y = casl_deg, #size=simObs, 
                 color = "pred"), 
             alpha=0.3) +
  geom_point(data=obsmeandegs, 
             aes(x=age, y = casl_deg, size=wtdObs, color = "obs"), 
             alpha=0.3) +
  labs(title = "Casl Mean Degree by Age",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelmc, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Inst

ggplot(simegos, aes(x = age, y = sim.instperwk, color = "pred")) +
  geom_smooth() +
  geom_smooth(data=obsegos, 
             aes(x=age, y = instperwk, color = "obs",
                 weight=ego.wawt)) +
  geom_point(data=simmeandegs, 
             aes(x=age, y = inst_wk, #size=simObs, 
                 color = "pred"), 
             alpha=0.3) +
  geom_point(data=obsmeandegs, 
             aes(x=age, y = inst_wk, size=wtdObs, color = "obs"), 
             alpha=0.3) +
  labs(title = "Inst Weekly Count by Age",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelinst, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Breakdowns by Node Attributes

Race

obs_mdeg_agexrace <- obsegos %>%
  group_by(trunc(age), race) %>%
  summarize(main_deg = mean(deg.main),
            casl_deg = mean(deg.casl),
            inst_wk = mean(instperwk),
            wtdObs = sum(ego.wawt)) %>%
  rename(age = `trunc(age)`)

sim_mdeg_agexrace <- simegos %>%
  group_by(trunc(age), race) %>%
  summarize(main_deg = mean(sim.main),
            casl_deg = mean(sim.casl),
            inst_wk = mean(sim.instperwk),
            simObs = n()) %>%
  rename(age = `trunc(age)`)

Main

ggplot(simegos, aes(x = age, y = sim.main, color = "pred")) +
  geom_smooth(method = "loess", span = 1) +
  geom_smooth(data=obsegos, 
              aes(x=age, y = deg.main, color = "obs",
                  weight=ego.wawt),
              span=2) +
  geom_point(data=obs_mdeg_agexrace, 
             aes(x=age, y = main_deg, size=wtdObs, color = "obs"), 
             alpha=0.3) +
  facet_grid(cols = vars(race)) +
  labs(title = "Main Mean Degree by Age and Race",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelmc, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Casl

ggplot(simegos, aes(x = age, y = sim.casl, color = "pred")) +
  geom_smooth(method = "loess", span = 1) +
  geom_smooth(data=obsegos, 
              aes(x=age, y = deg.casl, color = "obs",
                  weight=ego.wawt)) +
  geom_point(data=obs_mdeg_agexrace, 
             aes(x=age, y = casl_deg, size=wtdObs, color = "obs"), 
             alpha=0.3) +
  facet_grid(cols = vars(race)) +
  labs(title = "Casl Mean Degree by Age and Race",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelmc, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Inst

ggplot(simegos, aes(x = age, y = sim.instperwk, color = "pred")) +
  geom_smooth(method="loess", span=1) +
  geom_smooth(data=obsegos,
              aes(x=age, y = instperwk, color = "obs",
                  weight=ego.wawt)) +
  geom_point(data=obs_mdeg_agexrace,
             aes(x=age, y = inst_wk, size=wtdObs, color = "obs"),
             alpha=0.3) +
  facet_grid(cols = vars(race)) +
  labs(title = "Inst Mean Degree by Age and Race",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelinst, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Region

omdageregion <- obsegos %>%
  group_by(trunc(age), region) %>%
  summarize(main_deg = mean(deg.main),
            casl_deg = mean(deg.casl),
            inst_wk = mean(instperwk),
            wtdObs = sum(ego.wawt)) %>%
  rename(age = `trunc(age)`)

Main

ggplot(simegos, aes(x = age, y = sim.main, color = "pred")) +
  geom_smooth(method = "loess", span = 1) +
  geom_smooth(data=obsegos, 
              aes(x=age, y = deg.main, color = "obs",
                  weight=ego.wawt)) +
  geom_point(data=omdageregion, 
             aes(x=age, y = main_deg, size=wtdObs, color = "obs"), 
             alpha=0.3) +
  facet_grid(cols = vars(region)) +
  labs(title = "Main Mean Degree by Age and Region",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelmc, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Casl

ggplot(simegos, aes(x = age, y = sim.casl, color = "pred")) +
  geom_smooth(method = "loess", span = 1) +
  geom_smooth(data=obsegos,
              aes(x=age, y = deg.casl, color = "obs",
                  weight=ego.wawt)) +
  geom_point(data=omdageregion,
             aes(x=age, y = casl_deg, size=wtdObs, color = "obs"),
             alpha=0.3) +
  facet_grid(cols = vars(region)) +
  labs(title = "Casl Mean Degree by Age and Region",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelmc, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Inst

ggplot(simegos, aes(x = age, y = sim.instperwk, color = "pred")) +
  geom_smooth(method = "loess", span = 1) +
  geom_smooth(data=obsegos,
              aes(x=age, y = instperwk, color = "obs",
                  weight=ego.wawt)) +
  geom_point(data=omdageregion,
             aes(x=age, y = inst_wk, size=wtdObs, color = "obs"),
             alpha=0.3) +
  facet_grid(cols = vars(region)) +
  labs(title = "Inst Mean Degree by Age and Region",
       subtitle = "Observed Data and Predicted Values",
       caption = paste("Model:", params$modelinst, ";  pkgVersions:",  params$version),
       x = "Age",
       y = "Mean Degree",
       color = "Data source") +
  scale_color_manual(values = colors)

Direct comparisons

Age means

Here we plot counts and mean degrees for each integer value of age, comparing simulated vs. observed. ideally all points will lie along the red line showing equality.

  • The scaleup factor for persons of each age is working as expected.

  • The correlation of sim and observed mean degrees varies by network.

wide <- full_join(obsmeandegs, simmeandegs, by = "age")

par(mfrow=c(2,2))

plot(wide$wtdObs, wide$simObs,
     main = "Age Composition",
     xlab = "Observed",
     ylab = "Simulated")
abline(0, nrow(simegos)/nrow(obsegos), col = "red")

with(wide, symbols(x=main_deg.x, y=main_deg.y, 
                   circles=wtdObs, inches=1/10,
                   add=F, bg="steelblue2", fg="black",
                   xlim = c(0, 0.5), ylim = c(0, 0.5),
                   main = "Main Mean Deg",
                   xlab = "Observed",
                   ylab = "Simulated"))
abline(0,1, col="red")

with(wide, symbols(x=casl_deg.x, y=casl_deg.y, 
                   circles=wtdObs, inches=1/10,
                   add=F, bg="steelblue2", fg="black",
                   xlim = c(0, 2), ylim = c(0, 2),
                   main = "Casl Mean Deg",
                   xlab = "Observed",
                   ylab = "Simulated"))
abline(0,1, col="red")

with(wide, symbols(x=inst_wk.x, y=inst_wk.y, 
                   circles=wtdObs, inches=1/10,
                   add=F, bg="steelblue2", fg="black",
                   xlim = c(0, 0.3), ylim = c(0, 0.3),
                   main = "Inst per wk (mean)",
                   xlab = "Observed",
                   ylab = "Simulated"))
abline(0,1, col="red")

Edge counts

Sim is boxplot; Sim mean is blue diamond; Obs target is red line.

par(mfrow = c(1,3))

boxplot(main_sims$edges, main = "Main")
abline(h = fit_main$target.stats["edges"], col="red")
points("1", mean(main_sims$edges), pch = 5, col = "blue")

boxplot(casl_sims$edges, main = "Casl")
abline(h = fit_casl$target.stats["edges"], col="red")
points("1", mean(casl_sims$edges), pch = 5, col = "blue")

boxplot(inst_sims$edges, main = "Inst")
abline(h = netest_inst$fit$target.stats["edges"], col="red")
points("1", mean(inst_sims$edges), pch = 5, col = "blue")

Obs Deg v Pred Deg

Using the scaled up nodeset, boxplot the predicted meandeg ~ obsdeg. Note for the Inst network, metric is partners per month (easier to understand)

par(mfrow = c(1,3))

boxplot(simegos$sim.main ~ factor(simegos$deg.main),
        main = "Main", xlab = "Observed", ylab = "Predicted",
        varwidth = T)

boxplot(simegos$sim.casl ~ factor(simegos$deg.casl),
        main = "Casl", xlab = "Observed", ylab = "Predicted",
        varwidth = T)

instobs <- cut(simegos$count.oo.part/12,
           breaks=c(-Inf, 0, 1, 2, 3, 4, Inf),
           labels = as.character(c(seq(0,4,1), "5+")))

boxplot(simegos$sim.instperwk*4 ~ instobs, varwidth = T,
        main = "Inst",
        sub = "Note: scale is per Month",
        xlab = "Observed",
        ylab = "Predicted")