1 Back to Outline

This section includes information on network diagnostics. You can go back to table of contents if you would like.

Other related documents are:

library(here)
if (is.null(params$sim_data)) {
  sim_loc <- "../../EpiModel/AE/sim_epimodel3/sim_on_2021-06-07_at_2033.rds"
}else{
  sim_loc <- params$sim_data
}

make_mean_deg_plot <- function(sim, var_type = "race", 
                               targ_inf = NULL, simnum = 1){
  edge_counts <- sim$temp[[simnum]]$demog_edge_counts
  edge_counts$at[edge_counts$at == 1] <- 
    min(edge_counts$at[edge_counts$at != 1]) - 1
  edge_counts[, "col_var"] <- edge_counts[, var_type]
  f_edg_cnts <- edge_counts %>% filter(ptype == 0) %>%
    mutate(rep = 3) %>% uncount(rep, .id = "ptype")
  edge_counts <- bind_rows(
    edge_counts %>% filter(ptype != 0),
    f_edg_cnts
  )
  
  cmpr_edges <- pivot_wider(
    edge_counts, 
     values_from = "count", names_from = "type",
    id_cols = colnames(edge_counts)
    ) %>% filter(age.grp != 6) %>%
    group_by(ptype, col_var, at) %>%
    summarise(partner = sum(partner, na.rm = TRUE),
              total = sum(total, na.rm = TRUE)) %>%
    mutate(
      "mean_deg" = partner / total, 
      "year"  = round(
        (at - sim$control$start) / 52 + sim$control$year.start, 4
        )
      )
  cmpr_edges$ptype <- factor(cmpr_edges$ptype, levels = 1:3,
                             labels = c("Main",  "Casl", "Inst"))
  if (is.null(targ_inf)) {
    cmpr_edges %>% ungroup() %>% 
     ggplot(aes(x = year, y = mean_deg, 
                color = as.character(col_var))) + 
     geom_point(alpha = 0.5) + geom_smooth() + 
     facet_wrap(~ ptype, scales = "free_y" ) + 
     geom_vline(xintercept = 2019, color = "black") +
     scale_colour_discrete(var_type) + ylab("Mean Degree") +
   xlab("Year") 
  }else{
    newtarg_inf <- targ_inf[[var_type]] %>% filter(
      var %in% c("deg.main", "deg.casl", "count.oo.wk")
    )
    newtarg_inf$ptype <- recode(
      newtarg_inf$var, deg.main = "Main", deg.casl = "Casl",
      count.oo.wk = "Inst")
    newtarg_inf$col_var <- newtarg_inf[, var_type, drop = TRUE]
    
    newtarg_inf$ptype <- factor(newtarg_inf$ptype,
                                levels = c("Main",  "Casl", "Inst"),
                                labels = c("Main",  "Casl", "Inst"))
    
    cmpr_edges %>% ungroup() %>% 
     ggplot(aes(x = year, y = mean_deg, 
                color = as.character(col_var))) + 
     geom_point(alpha = 0.5) + geom_smooth() +
     geom_hline(data = newtarg_inf, aes(yintercept = wtd.mean,
                                      color = as.factor(col_var))) +
     facet_wrap(~ ptype, scales = "free_y" ) + 
     geom_vline(xintercept = 2019, color = "black") +
     scale_colour_discrete(var_type) + ylab("Mean Degree") +
     xlab("Year") 
  }
}

make_mean_agextime <- function(sim, var_type = "race",
                               yrs = NULL, targ_year = 2019,
                               targ_inf = NULL,
                               plot_dif = FALSE){
  if (length(targ_year) != 1) {stop("Must specify one target year")}
  edge_counts <- sim$temp[[1]]$demog_edge_counts
  if (var_type == "none") {
    edge_counts[, "facet_var"] <- "pooled"
  }else{
    edge_counts[, "facet_var"] <- edge_counts[, var_type]
  }
  f_edg_cnts <- edge_counts %>% filter(ptype == 0) %>%
    mutate(rep = 3) %>% uncount(rep, .id = "ptype")
  edge_counts <- bind_rows(
    edge_counts %>% filter(ptype != 0),
    f_edg_cnts
  )
  cmpr_edges <- pivot_wider(
    edge_counts, 
     values_from = "count", names_from = "type",
    id_cols = colnames(edge_counts)
    ) %>% group_by(ptype, age, at, facet_var) %>%
    summarise(partner = sum(partner, na.rm = TRUE),
              total = sum(total, na.rm = TRUE)) %>%
    mutate(
      "mean_deg" = partner / total, 
      "year"  = round(
        (at - sim$control$start) / 52 + sim$control$year.start
        )
      )
  cmpr_edges$ptype <- factor(cmpr_edges$ptype, levels = 1:3,
                             labels = c("Main",  "Casl", "Inst"))
  all_yrs <- sort(unique(cmpr_edges$year), decreasing = FALSE)[-1]
  if (is.null(yrs)) {
    keep_yrs <- c(min(all_yrs), all_yrs[all_yrs %% 10 == 0])
  }else{
    keep_yrs <- yrs
  }
  keep_yrs <- unique(c(yrs, targ_year))
  cmpr_edges <- cmpr_edges %>% filter(year %in% keep_yrs, 
                                      year < 2031)
  cmpr_edges$year <- factor(
    cmpr_edges$year, 
    levels = sort(cmpr_edges$year), 
    labels = sort(cmpr_edges$year)
    )
  if (is.null(targ_inf)) {
   cmpr_edges %>% ungroup() %>% 
     ggplot(aes(x = age, y = mean_deg, col = facet_var)) + 
     # geom_line(alpha = 0.5) +
     geom_smooth(se = FALSE, span = 4) + #, size = 2) + 
     facet_grid(year~ptype) + 
     geom_vline(xintercept = 2019, color = "black") +
     scale_colour_discrete("Partnership Type") +
     ylab("Mean Degree") +
     xlab("Age") + theme(legend.position = "bottom")
  }else{
    long_dat <- targ_inf[[paste0("agex", var_type)]]
    if (var_type == "none") {
      long_dat[, "facet_var"] <- "pooled"
    }else{
      long_dat[, "facet_var"] <- long_dat[, var_type, drop = TRUE]
    }
    long_dat <- long_dat %>% filter(
      var %in% c("deg.main", "deg.casl", "count.oo.wk")
    ) %>% mutate(rep = 10) %>% uncount(weights = rep, .id = "age") %>%
      mutate(age = age + 4 + age.grp * 10)
    long_dat$ptype <- recode(
      long_dat$var, deg.main = "Main", deg.casl = "Casl",
      count.oo.wk = "Inst")
    
    long_dat$ptype <- factor(long_dat$ptype,
                                levels = c("Main",  "Casl", "Inst"),
                                labels = c("Main",  "Casl", "Inst"))
    r_mean_degs <- long_dat %>% mutate(rep = length(keep_yrs)) %>%
      uncount(rep, .id = "year") %>% mutate(year = keep_yrs[year]) %>%
      filter(year < 2031)
    r_mean_degs$year <- factor(
    r_mean_degs$year,
    levels = sort(r_mean_degs$year),
    labels = sort(r_mean_degs$year)
    )
    if (var_type == "none") {alph <- 0.3}else{alph = 0}
    all_md <- bind_rows(
      bind_cols(cmpr_edges %>% ungroup() %>% 
                  select(ptype, age, facet_var,
                         mean_deg, year),
                "type" = "Simulated"),
      bind_cols(r_mean_degs %>% select(ptype, age, facet_var, age.grp,
                                       mean_deg = wtd.mean, year),
                "type" = "Target")
    ) 
    if (plot_dif) {
      wd <- all_md %>% ungroup() %>%
        select(type, facet_var, 
                              mean_deg, ptype, year, age) %>% 
        pivot_wider(names_from = type, 
                    values_from = mean_deg) %>%
        mutate(dif = Simulated - Target )
     wd %>%
     ggplot(aes(x = age, y = dif, 
                col = facet_var)) + 
     geom_hline(yintercept = 0, color = "black") +
     geom_vline(xintercept = 65, color = "gray") +
     
     geom_smooth(se = FALSE) + #, size = 2, span = 2.5) + 
     facet_wrap(~ptype + year, scales = "free_y", dir = "v", 
                ncol = 3) + 
     scale_colour_discrete(var_type) +
     coord_cartesian(ylim = c(-0.6, 0.4)) +
     ylab("Mean Degree (Observed - Target)") +
     xlab("Age") + theme(legend.position = "bottom")
    }else{
      fac_df <- expand.grid(year = unique(all_md$year), 
                            ptype = unique(all_md$ptype))
      fac_df$highl <- as.numeric(fac_df$year == targ_year)
      all_md %>% ungroup() %>% 
        ggplot() +
        # geom_point(alpha = alph) +
        geom_rect(data = fac_df %>% filter(highl == 1),
                  aes(fill = as.factor(highl)),
                  xmin = -Inf, xmax = Inf,
                  ymin = -Inf, ymax = Inf, alpha = 0.3) +
        geom_vline(xintercept = 65, color = "gray") +
        geom_smooth(
          data = all_md %>% filter(type == "Simulated"),
          aes(x = age, y = mean_deg, 
                        color = facet_var,
                   linetype = type), 
                   se = FALSE) + #, size = 2, span = 2.5) + 
        geom_line(
          data = all_md %>% filter(type == "Target"),
          aes(x = age, y = mean_deg, group = paste0(age.grp, facet_var),
                        color = facet_var,
                   linetype = type), size = 1.5, alpha = 0.5) +
        facet_grid(ptype ~ year, scales = "free_y") + 
        scale_colour_discrete(var_type) +
        scale_fill_manual(values = "black") +
        ylab("Mean Degree") + guides(fill = FALSE) +
        xlab("Age") + theme(legend.position = "bottom") + 
        coord_cartesian(ylim = c(0, NA)) + 
        scale_linetype_manual(values = c(1, 2))
      } 
  }
}
attr_names <- EpiModelWHAMPDX::attr_names
attr_names$insurance <- NULL
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(ggrepel))
options(dplyr.summarise.inform = FALSE)
library(EpiModelWHAMPDX)
library(here)
sim_dat <- readRDS(sim_loc)
knitr::opts_chunk$set(echo = TRUE, message = FALSE,
                      warning = FALSE, fig.width = 8)
cur_targs <- readRDS(here("Data", "EpiModelSims", "WHAMP.dx.targs.rds"))

2 Network

Here, targets are taken from the initialized network at the beginning of the EpiModel simulation that matches the network statistics observed in the WHAMP ego data.

2.1 Mean Degree across time

2.1.1 Within race group

temp_targs <- readRDS(here("Data", "Targets", "WhampNetTargets.RDS"))
make_mean_deg_plot(sim_dat, var_type = "race", targ_inf = temp_targs$meandeg)

2.1.2 Within Region

make_mean_deg_plot(sim_dat, var_type = "region", targ_inf = temp_targs$meandeg)

2.2 Mean Degree across Age

2.2.1 Within race group

make_mean_agextime(sim_dat, "race", yrs = c(2005, 2015, 2019, 2025, 2030), 
                   targ_inf = temp_targs$meandeg)

2.2.2 Within Region

make_mean_agextime(sim_dat, "region", yrs = c(2005, 2015, 2019, 2025, 2030), 
                   targ_inf = temp_targs$meandeg)

2.3 Mean relationship duration during simulation

## TODO: Make this work with multiple simulations
rd <- sim_dat$temp[[1]][["rel_durs"]]
rd$type <- "Simulated"
all_ended <- rd %>% filter(ong == "ended", at > 520) %>% group_by(ptype) %>% 
  mutate(rel_dur = cumsum((rel_dur * num_obs)[order(at)])/
            cumsum((num_obs)[order(at)]), 
         ong = "ended_tot")
rd <- rbind(rd, all_ended)
epistats <- readRDS("../../Data/Params/epistats_whamp.rds")

main_mean_relage <-
  as.numeric(1/(1 - (2^(-1/(epistats$relage.main$wtd.median))))) 
casl_mean_relage <-
  as.numeric(1/(1 - (2^(-1/(epistats$relage.casl$wtd.median)))))
rel_ages <- c(main_mean_relage, casl_mean_relage) / 52

targ_ages <- data.frame(ptype = 1:2, ong = NA, 
                        rel_dur = rel_ages[1:2], 
                        type = "Weighted Survey Responses")

semat <- rd %>% filter(at > 700) %>%
  group_by(type, ong, ptype) %>% summarise(se = sd(rel_dur))
rd <- left_join(rd, semat, by = c("type", "ong", "ptype"))
ggplot(rd, aes(color = ong, linetype = type, fill = ong)) +
  geom_line(aes(x = at / 52 + 1943, y = rel_dur), size = 1) + 
  geom_ribbon(aes(x = at / 52 + 1943, ymin = rel_dur - 2 * se, 
                  ymax = rel_dur + 2 * se), alpha = 0.3, color = NA) + 
  geom_hline(data = targ_ages,
             aes(color = ong, linetype = type, yintercept = rel_dur),
              size = 1) +
  facet_wrap(~ptype, labeller = as_labeller(c(`1` = "Main", `2` = "Casual"))) +
  xlab("Year") + ggtitle("Mean Relationship Duration (in Years)") +
  coord_cartesian(ylim = c(1.5, 5.5)) +
  scale_color_discrete(
    name = "Status",
    labels = c("ong" = "Still Ongoing (rel_age)",
               "ended" = "Ended in Last Year (rel_dur)",
               "ended_tot" = "All Completed Relationships")) +
  scale_fill_discrete(
    name = "Status",
    labels = c("ong" = "Still Ongoing (rel_age)",
               "ended" = "Ended in Last Year (rel_dur)",
               "ended_tot" = "All Completed Relationships")) +
  scale_linetype_manual(name = "Source",
                        values = c("solid", "dotted")) +
  cowplot::theme_cowplot() + 
  theme(legend.position = "bottom", axis.title = element_blank(), 
        legend.box = "vertical", legend.margin = margin(), 
        panel.background = element_rect(fill = "#E0E2E7"),
        panel.grid = element_line(color = "#FFFFFF", size = 0.4))

2.4 End of sim Degree Distribution

2.4.1 Overall

deg.targs <- readRDS(here("Data", "Targets", "WhampNetTargets.RDS"))
el_inf <- sim_dat$el[[1]] %>% lapply(
  FUN = function(x) {
    t1 <- table(x[, 1])
    t2 <- table(x[, 2])
    all_edge <- data.frame(id = as.numeric(c(names(t1), names(t2))),
                           count = c(t1, t2)) %>%
      group_by(id) %>% summarise(count = sum(count))
    return(all_edge)
  }
)

sim_dat$attr[[1]]$deg.main[el_inf[[1]]$id] <- el_inf[[1]]$count
sim_dat$attr[[1]]$deg.main[-el_inf[[1]]$id] <- 0
sim_dat$attr[[1]]$deg.casl[el_inf[[2]]$id] <- el_inf[[2]]$count
sim_dat$attr[[1]]$deg.casl[-el_inf[[2]]$id] <- 0
sim_dat$attr[[1]]$deg.inst <- rep(0, length(sim_dat$attr[[1]]$deg.casl))
sim_dat$attr[[1]]$deg.inst[el_inf[[3]]$id] <- el_inf[[3]]$count
sim_dat$attr[[1]]$deg.inst[-el_inf[[3]]$id] <- 0

## TODO: Make this work with multiple simulations
eos_dd_dat_orig <- do.call(
  data.frame, 
  sim_dat$attr[[1]][c("deg.main", "deg.casl", "deg.inst", "race", "region",
                      "snap", "age.grp")])
eos_dd_dat <- eos_dd_dat_orig %>% 
  pivot_longer(cols = c("deg.main", "deg.casl", "deg.inst"),
               names_to = "Partner Type", 
               values_to = "Degree") %>%
  mutate(Degree = pmin(5, Degree))

eos_dd_dat$`Partner Type` = factor(
  eos_dd_dat$`Partner Type`, 
  levels = c("deg.main", "deg.casl", "deg.inst"),
  labels = c("Main", "Casl",  "Inst"))

targ_dist <- bind_rows(
  bind_cols(deg.targs$deg.dists$main,
            `Partner Type` = "Main", Source = "Target") %>%
    mutate(Degree = deg.main) %>% select(-deg.main),
  bind_cols(deg.targs$deg.dists$casl,
            `Partner Type` = "Casl", Source = "Target") %>%
    mutate(Degree = deg.casl) %>% select(-deg.casl),
  bind_cols(deg.targs$deg.dists$inst,
            `Partner Type` = "Inst", Source = "Target") %>%
    mutate(Degree = count.oo.wk) %>% select(-count.oo.wk),
) %>% select(`Partner Type`, Degree, proportion = prop.wtd, Source)

obs_dist <- eos_dd_dat %>% group_by(`Partner Type`, Degree) %>% 
  count() %>% ungroup() %>% group_by(`Partner Type`) %>%
  mutate(proportion = n / sum(n), 
         Source = "Simulated") %>% ungroup()

ggplot(bind_rows(obs_dist, targ_dist),
       aes(x = Degree, y = proportion, fill = Source)) + 
  geom_col(position = "dodge") + facet_wrap(~`Partner Type`) + 
  ylab("Proportion") +
  labs(caption = "Targets come from the observed WHAMP degree distribution")

2.4.2 By Race

make_eos_deg_plot <- function(var, name) {
  eos_dd_dat %>% group_by({{ var }}, Degree, `Partner Type`) %>% 
    summarise(count = n()) %>% ungroup() %>%
    group_by({{ var }}) %>% mutate(Proportion = count / sum(count)) %>%
    ggplot(aes(x = Degree, y = Proportion, color = {{ var }},
               fill = {{ var }})) + 
    geom_col(position = "dodge") + facet_wrap(~`Partner Type`) + 
    theme(legend.position = "bottom") +
    guides(colour = guide_legend(nrow = 1)) + scale_color_discrete(name) +
    scale_fill_discrete(name)
}

eos_dd_dat <- eos_dd_dat %>% mutate(
  snap5 = pmin(5, snap),
  race = recode(race, !!!attr_names$race),
  region = recode(region, !!!attr_names$region),
  snap5 = recode(paste0("sn", snap5), !!!attr_names$snap5),
  age.grp = recode(age.grp, !!!attr_names$age.grp),
)

make_eos_deg_plot(race, "Race")

2.4.3 By Region

make_eos_deg_plot(region, "Region")

2.4.4 By Age Group

eos_dd_dat$age.grp <- as.character(eos_dd_dat$age.grp)
make_eos_deg_plot(age.grp, "Age Group")

2.4.5 By SNAP

make_eos_deg_plot(snap5, "SNAP Group")

2.5 Average SNAP Value

snap.mean <- list(
  mean.snap = list(name = "mean.snap",
                  sec_title = "Mean SNAP Value",
                  plot_name = "Mean SNAP ",
                  plot_cap = "",
                  plot_ylab = "Mean",
                  plt_type = "line",
                  vars = c("mean.snap."),
                  sum_fun = function(x) {x}
  ),
  mean.snap5 = list(name = "mean.snap5",
                  sec_title = "Mean SNAP5 Value",
                  plot_name = "Mean SNAP5 ",
                  plot_cap = "",
                  plot_ylab = "Mean",
                  plt_type = "line",
                  vars = c("mean.snap5."),
                  sum_fun = function(x) {x}
  )
)
print_many_plots(snap.mean, sim_dat, num_hash = 3, targ_df = cur_targs)

2.5.1 Mean SNAP Value

2.5.1.1 ovr

2.5.1.2 race

2.5.1.3 region

2.5.1.4 age.grp

2.5.1.5 snap5

2.5.2 Mean SNAP5 Value

2.5.2.1 ovr

2.5.2.2 race

2.5.2.3 region

2.5.2.4 age.grp

2.5.2.5 snap5

2.6 End of Simulation Mean Age of Ties

2.6.1 Table

last_step <- rd %>% filter(at == max(rd$at)) %>% select(ptype, ong, rel_dur)
last_step$ong <- dplyr::recode(last_step$ong, ong = "Ongoing at Last time Step",
               ended = "Ended in Last Year",
               ended_tot = "All Completed Relationships")
target <- targ_ages %>% mutate(ong = "Target") %>% select(-type)
frst_tble <- bind_rows(target, last_step)
frst_tble$ptype <- c("Main", "Casual")[frst_tble$ptype]
tble <- frst_tble %>% pivot_wider(names_from = ptype, values_from = rel_dur)
targs <- tble %>% filter(row_number() == 1) %>%
  mutate(count = 4) %>% uncount(count)
colnames(targs)[2:3] <- c("mt", "ct")
targs$ong <- tble$ong
fin_tab <- left_join(tble, targs, by = "ong") %>% 
  mutate(mpd = 100 * (Main - mt) / (Main), 
         cpd = 100 * (Casual - ct) / Casual) %>% 
  select(-mt, -ct) %>% relocate(mpd, .after = Main) %>%
  mutate(across(where(is.double), round, 2)) %>%
  mutate(targ = c("Simulated", "Target")[as.numeric(mpd == 0) + 1]) %>%
  group_by(targ)

gt::gt(fin_tab) %>% gt::cols_label(ong = " ", Main = "Duration", 
                                mpd = "Percentage Difference",
                                Casual = "Duration", 
                                cpd = "Percentage Difference") %>%
  gt::tab_header(
    title = "Mean Relationship Durations in Years",
    subtitle = "Target comes from Netest object and other values are based on EpiModel simulation"
  ) %>% gt::cols_align(
    "center"
  ) %>% gt::tab_spanner(
    label = "Main Relationships",
    columns = c("Main", "mpd")
  ) %>% gt::tab_spanner(
    label = "Casual Relationships",
    columns = c("Casual", "cpd")
  ) %>%
  gt::tab_style(
   style = gt::cell_text(weight = "bold"),
   locations = gt::cells_body(rows = ong == "Target")
  ) %>% gt::tab_style(
    style = gt::cell_fill(color = "lightgrey"),
    locations = list(
      gt::cells_body(columns = c("mpd", "cpd"))
      )
  )
Mean Relationship Durations in Years
Target comes from Netest object and other values are based on EpiModel simulation
Main Relationships Casual Relationships
Duration Percentage Difference Duration Percentage Difference
Target
Target 4.82 0.00 2.29 0.00
Simulated
Ended in Last Year 4.71 -2.50 2.21 -3.57
Ongoing at Last time Step 4.41 -9.31 2.19 -4.73
All Completed Relationships 4.53 -6.58 2.19 -4.72

2.6.2 Distribution Plot

For these plots, the histogram provides the observed distribution, and the red and blue lines are the expected probability density of relationship duration under an exponential model with the observed and target mean duration respectively.

## TODO: Make this work with multiple simulations
plist <- sim_dat$temp[[1]][["plist"]] %>% as.data.frame()

plist$ong <- "ended"
plist$ong[is.na(plist$stop)] <- "ong"
plist$stop[is.na(plist$stop)] <- max(plist$stop, na.rm = TRUE) 
plist$length <- round(pmax(0, (plist$stop - (plist$start - 1)) / 52))
plist$flr_length <- floor(pmax(0, (plist$stop - (plist$start - 1)) / 52))
n_plist <- plist %>% filter(ptype != 3, flr_length < 25) %>%
  group_by(ptype, ong, length = flr_length) %>%
  count() %>% ungroup() %>% group_by(ptype, ong) %>%
  mutate(prop = n / sum(n))
durs <- plist %>% filter(ptype != 3) %>%
    group_by(ptype, ong) %>%
    summarise(rel_dur = pmax(0, mean((stop - start) / 52)), 
              type = "Observed") 
targ_durs <- data.frame(ptype = c(1, 1, 2, 2), 
                        ong = c("ended", "ong",
                                "ended", "ong"), 
                        rel_dur = rep(rel_ages, each = 2),
                        type = "Target")
durs <- bind_rows(durs, targ_durs)
  
exp_fits <- durs %>% 
  mutate(num = max(n_plist$length) + 1) %>%
  uncount(weights = num, .id = "length") %>%
  mutate(length = length - 1) %>% 
  mutate(prop = dexp(x = length + 0.5, rate = 1 / rel_dur))

ggplot(n_plist, aes(x = length, y = prop)) + 
  geom_line(data = exp_fits, aes(color = type), alpha = 1) +
  geom_col(alpha = 0.5) + 
  facet_grid(
    ptype ~ ong, 
    labeller = as_labeller(c(`1` = "Main", `2` = "Casual", 
                             `ong` = "Still Ongoing at End", 
                             `ended` = "Ended in Last Year"))) +
    xlab("Relationship Duration") + ylab("Proportion")  + labs(
     caption = "Lines represent exponential fits based on the target mean duration and the \n observed mean duraiton in the EpiModel simulation at the last step"
  )

2.7 Cross Tab of Main and Casual Degree

2.7.1 Simulated

tidy_sim <- eos_dd_dat_orig %>% filter(age.grp < 6) %>%
  mutate(deg.casl = pmin(6, deg.casl),
         deg.main = pmin(6, deg.main)) %>%
  group_by(deg.main, deg.casl) %>% 
  count() %>% ungroup() %>%
  mutate(n = ifelse(is.na(n), 0, n),
         Proportion = round(100 * n / sum(n), 2)) %>% 
  select(-n) 

simulated <- tidy_sim %>% pivot_wider(deg.main, names_from = "deg.casl", 
                             values_from = Proportion) %>%
  mutate(across(2:8, function(x) ifelse(is.na(x), 0, x)))

simulated %>%
  gt::gt() %>%
  gt::tab_spanner(
    label = "Casual Degree",
    columns = vars(`0`, `1`, `2`, `3`, `4`, `5`, `6`)
  ) %>% 
  gt::cols_label(
    deg.main = "Main Degree"
  )
Main Degree Casual Degree
0 1 2 3 4 5 6
0 29.54 24.09 12.28 5.07 1.36 0.33 0.35
1 12.77 7.64 3.35 1.04 0.24 0.06 0.05
2 0.78 0.52 0.20 0.08 0.00 0.00 0.00
3 0.13 0.07 0.03 0.01 0.00 0.00 0.00
4 0.01 0.01 0.01 0.00 0.00 0.00 0.00

2.7.2 Observed (in WHAMP data)

tidy_whamp <- WHAMPData::whampArtnetWA_egodata$main$egos %>% 
  group_by(deg.main, deg.casl) %>% 
  count() %>% ungroup() %>%
  mutate(n = ifelse(is.na(n), 0, n),
         Proportion = round(100 * n / sum(n), 2)) %>% 
  select(-n)

obs_whamp <- tidy_whamp %>% pivot_wider(deg.main, names_from = "deg.casl", 
                             values_from = Proportion) %>%
  mutate(across(2:7, function(x) ifelse(is.na(x), 0, x)))

obs_whamp %>%
  gt::gt() %>%
  gt::tab_spanner(
    label = "Casual Degree",
    columns = vars(`0`, `1`, `2`, `3`, `4`, `5`)
  ) %>% 
  gt::cols_label(
    deg.main = "Main Degree"
  )
Main Degree Casual Degree
0 1 2 3 4 5
0 29.93 24.76 8.53 4.81 2.88 0.84
1 17.67 3.97 3.00 1.80 0.48 0.00
2 0.48 0.36 0.36 0.12 0.00 0.00

2.7.3 Absolute difference (in percent)

Difference in percentage between the WHAMP survey and simulated data.

diffs <- full_join(tidy_sim,
                   tidy_whamp %>% rename("prop.whamp" = Proportion),
          by = c("deg.main", "deg.casl")) %>%
  mutate(deg.main = c("0", "1", "2+")[1 + pmin(2, deg.main)],
         deg.casl = c("0", "1", "2", "3+")[1 + pmin(3, deg.casl)]) %>%
  group_by(deg.main, deg.casl) %>%
  summarise(prop.whamp = sum(prop.whamp, na.rm = TRUE),
            Proportion = sum(Proportion, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(prop.whamp = ifelse(is.na(prop.whamp), 0, prop.whamp)) %>%
  mutate(abs_diff = prop.whamp - Proportion, 
         perc_dif = 100 * round(abs_diff / prop.whamp, 4))

diffs %>% select(deg.main, deg.casl, abs_diff) %>%
  pivot_wider(deg.main, names_from = "deg.casl", 
                             values_from = abs_diff) %>%
  mutate(across(2:4, function(x) ifelse(is.na(x), 0, x))) %>%
  gt::gt() %>%
  gt::tab_spanner(
    label = "Casual Degree",
    columns = vars(`0`, `1`, `2`, `3+`)
  ) %>% 
  gt::cols_label(
    deg.main = "Main Degree"
  )
Main Degree Casual Degree
0 1 2 3+
0 0.39 0.67 -3.75 1.42
1 4.90 -3.67 -0.35 0.89
2+ -0.44 -0.24 0.12 0.03

2.7.4 Percentage difference

Here we calculate the percent difference (of the percentages). The formula for this calculation is

\[ 100 \times \frac{\text{WHAMP Percentage} - \text{Simulated Percentage}}{\text{WHAMP Percentage}} \]

diffs %>% select(deg.main, deg.casl, perc_dif) %>%
  pivot_wider(deg.main, names_from = "deg.casl", 
                             values_from = perc_dif) %>%
  mutate(across(2:4, function(x) ifelse(is.na(x), 0, x))) %>%
  gt::gt() %>%
  gt::tab_spanner(
    label = "Casual Degree",
    columns = vars(`0`, `1`, `2`, `3+`)
  ) %>% 
  gt::cols_label(
    deg.main = "Main Degree"
  )
Main Degree Casual Degree
0 1 2 3+
0 1.30 2.71 -43.96 16.65
1 27.73 -92.44 -11.67 39.04
2+ -91.67 -66.67 33.33 25.00