This section includes information on network diagnostics. You can go back to table of contents if you would like.
Other related documents are:
library(here)
if (is.null(params$sim_data)) {
sim_loc <- "../../EpiModel/AE/sim_epimodel3/sim_on_2021-06-07_at_2033.rds"
}else{
sim_loc <- params$sim_data
}
make_mean_deg_plot <- function(sim, var_type = "race",
targ_inf = NULL, simnum = 1){
edge_counts <- sim$temp[[simnum]]$demog_edge_counts
edge_counts$at[edge_counts$at == 1] <-
min(edge_counts$at[edge_counts$at != 1]) - 1
edge_counts[, "col_var"] <- edge_counts[, var_type]
f_edg_cnts <- edge_counts %>% filter(ptype == 0) %>%
mutate(rep = 3) %>% uncount(rep, .id = "ptype")
edge_counts <- bind_rows(
edge_counts %>% filter(ptype != 0),
f_edg_cnts
)
cmpr_edges <- pivot_wider(
edge_counts,
values_from = "count", names_from = "type",
id_cols = colnames(edge_counts)
) %>% filter(age.grp != 6) %>%
group_by(ptype, col_var, at) %>%
summarise(partner = sum(partner, na.rm = TRUE),
total = sum(total, na.rm = TRUE)) %>%
mutate(
"mean_deg" = partner / total,
"year" = round(
(at - sim$control$start) / 52 + sim$control$year.start, 4
)
)
cmpr_edges$ptype <- factor(cmpr_edges$ptype, levels = 1:3,
labels = c("Main", "Casl", "Inst"))
if (is.null(targ_inf)) {
cmpr_edges %>% ungroup() %>%
ggplot(aes(x = year, y = mean_deg,
color = as.character(col_var))) +
geom_point(alpha = 0.5) + geom_smooth() +
facet_wrap(~ ptype, scales = "free_y" ) +
geom_vline(xintercept = 2019, color = "black") +
scale_colour_discrete(var_type) + ylab("Mean Degree") +
xlab("Year")
}else{
newtarg_inf <- targ_inf[[var_type]] %>% filter(
var %in% c("deg.main", "deg.casl", "count.oo.wk")
)
newtarg_inf$ptype <- recode(
newtarg_inf$var, deg.main = "Main", deg.casl = "Casl",
count.oo.wk = "Inst")
newtarg_inf$col_var <- newtarg_inf[, var_type, drop = TRUE]
newtarg_inf$ptype <- factor(newtarg_inf$ptype,
levels = c("Main", "Casl", "Inst"),
labels = c("Main", "Casl", "Inst"))
cmpr_edges %>% ungroup() %>%
ggplot(aes(x = year, y = mean_deg,
color = as.character(col_var))) +
geom_point(alpha = 0.5) + geom_smooth() +
geom_hline(data = newtarg_inf, aes(yintercept = wtd.mean,
color = as.factor(col_var))) +
facet_wrap(~ ptype, scales = "free_y" ) +
geom_vline(xintercept = 2019, color = "black") +
scale_colour_discrete(var_type) + ylab("Mean Degree") +
xlab("Year")
}
}
make_mean_agextime <- function(sim, var_type = "race",
yrs = NULL, targ_year = 2019,
targ_inf = NULL,
plot_dif = FALSE){
if (length(targ_year) != 1) {stop("Must specify one target year")}
edge_counts <- sim$temp[[1]]$demog_edge_counts
if (var_type == "none") {
edge_counts[, "facet_var"] <- "pooled"
}else{
edge_counts[, "facet_var"] <- edge_counts[, var_type]
}
f_edg_cnts <- edge_counts %>% filter(ptype == 0) %>%
mutate(rep = 3) %>% uncount(rep, .id = "ptype")
edge_counts <- bind_rows(
edge_counts %>% filter(ptype != 0),
f_edg_cnts
)
cmpr_edges <- pivot_wider(
edge_counts,
values_from = "count", names_from = "type",
id_cols = colnames(edge_counts)
) %>% group_by(ptype, age, at, facet_var) %>%
summarise(partner = sum(partner, na.rm = TRUE),
total = sum(total, na.rm = TRUE)) %>%
mutate(
"mean_deg" = partner / total,
"year" = round(
(at - sim$control$start) / 52 + sim$control$year.start
)
)
cmpr_edges$ptype <- factor(cmpr_edges$ptype, levels = 1:3,
labels = c("Main", "Casl", "Inst"))
all_yrs <- sort(unique(cmpr_edges$year), decreasing = FALSE)[-1]
if (is.null(yrs)) {
keep_yrs <- c(min(all_yrs), all_yrs[all_yrs %% 10 == 0])
}else{
keep_yrs <- yrs
}
keep_yrs <- unique(c(yrs, targ_year))
cmpr_edges <- cmpr_edges %>% filter(year %in% keep_yrs,
year < 2031)
cmpr_edges$year <- factor(
cmpr_edges$year,
levels = sort(cmpr_edges$year),
labels = sort(cmpr_edges$year)
)
if (is.null(targ_inf)) {
cmpr_edges %>% ungroup() %>%
ggplot(aes(x = age, y = mean_deg, col = facet_var)) +
# geom_line(alpha = 0.5) +
geom_smooth(se = FALSE, span = 4) + #, size = 2) +
facet_grid(year~ptype) +
geom_vline(xintercept = 2019, color = "black") +
scale_colour_discrete("Partnership Type") +
ylab("Mean Degree") +
xlab("Age") + theme(legend.position = "bottom")
}else{
long_dat <- targ_inf[[paste0("agex", var_type)]]
if (var_type == "none") {
long_dat[, "facet_var"] <- "pooled"
}else{
long_dat[, "facet_var"] <- long_dat[, var_type, drop = TRUE]
}
long_dat <- long_dat %>% filter(
var %in% c("deg.main", "deg.casl", "count.oo.wk")
) %>% mutate(rep = 10) %>% uncount(weights = rep, .id = "age") %>%
mutate(age = age + 4 + age.grp * 10)
long_dat$ptype <- recode(
long_dat$var, deg.main = "Main", deg.casl = "Casl",
count.oo.wk = "Inst")
long_dat$ptype <- factor(long_dat$ptype,
levels = c("Main", "Casl", "Inst"),
labels = c("Main", "Casl", "Inst"))
r_mean_degs <- long_dat %>% mutate(rep = length(keep_yrs)) %>%
uncount(rep, .id = "year") %>% mutate(year = keep_yrs[year]) %>%
filter(year < 2031)
r_mean_degs$year <- factor(
r_mean_degs$year,
levels = sort(r_mean_degs$year),
labels = sort(r_mean_degs$year)
)
if (var_type == "none") {alph <- 0.3}else{alph = 0}
all_md <- bind_rows(
bind_cols(cmpr_edges %>% ungroup() %>%
select(ptype, age, facet_var,
mean_deg, year),
"type" = "Simulated"),
bind_cols(r_mean_degs %>% select(ptype, age, facet_var, age.grp,
mean_deg = wtd.mean, year),
"type" = "Target")
)
if (plot_dif) {
wd <- all_md %>% ungroup() %>%
select(type, facet_var,
mean_deg, ptype, year, age) %>%
pivot_wider(names_from = type,
values_from = mean_deg) %>%
mutate(dif = Simulated - Target )
wd %>%
ggplot(aes(x = age, y = dif,
col = facet_var)) +
geom_hline(yintercept = 0, color = "black") +
geom_vline(xintercept = 65, color = "gray") +
geom_smooth(se = FALSE) + #, size = 2, span = 2.5) +
facet_wrap(~ptype + year, scales = "free_y", dir = "v",
ncol = 3) +
scale_colour_discrete(var_type) +
coord_cartesian(ylim = c(-0.6, 0.4)) +
ylab("Mean Degree (Observed - Target)") +
xlab("Age") + theme(legend.position = "bottom")
}else{
fac_df <- expand.grid(year = unique(all_md$year),
ptype = unique(all_md$ptype))
fac_df$highl <- as.numeric(fac_df$year == targ_year)
all_md %>% ungroup() %>%
ggplot() +
# geom_point(alpha = alph) +
geom_rect(data = fac_df %>% filter(highl == 1),
aes(fill = as.factor(highl)),
xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = Inf, alpha = 0.3) +
geom_vline(xintercept = 65, color = "gray") +
geom_smooth(
data = all_md %>% filter(type == "Simulated"),
aes(x = age, y = mean_deg,
color = facet_var,
linetype = type),
se = FALSE) + #, size = 2, span = 2.5) +
geom_line(
data = all_md %>% filter(type == "Target"),
aes(x = age, y = mean_deg, group = paste0(age.grp, facet_var),
color = facet_var,
linetype = type), size = 1.5, alpha = 0.5) +
facet_grid(ptype ~ year, scales = "free_y") +
scale_colour_discrete(var_type) +
scale_fill_manual(values = "black") +
ylab("Mean Degree") + guides(fill = FALSE) +
xlab("Age") + theme(legend.position = "bottom") +
coord_cartesian(ylim = c(0, NA)) +
scale_linetype_manual(values = c(1, 2))
}
}
}
attr_names <- EpiModelWHAMPDX::attr_names
attr_names$insurance <- NULL
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(ggrepel))
options(dplyr.summarise.inform = FALSE)
library(EpiModelWHAMPDX)
library(here)
sim_dat <- readRDS(sim_loc)
knitr::opts_chunk$set(echo = TRUE, message = FALSE,
warning = FALSE, fig.width = 8)
cur_targs <- readRDS(here("Data", "EpiModelSims", "WHAMP.dx.targs.rds"))
Here, targets are taken from the initialized network at the beginning of the EpiModel simulation that matches the network statistics observed in the WHAMP ego data.
temp_targs <- readRDS(here("Data", "Targets", "WhampNetTargets.RDS"))
make_mean_deg_plot(sim_dat, var_type = "race", targ_inf = temp_targs$meandeg)
make_mean_deg_plot(sim_dat, var_type = "region", targ_inf = temp_targs$meandeg)
make_mean_agextime(sim_dat, "race", yrs = c(2005, 2015, 2019, 2025, 2030),
targ_inf = temp_targs$meandeg)
make_mean_agextime(sim_dat, "region", yrs = c(2005, 2015, 2019, 2025, 2030),
targ_inf = temp_targs$meandeg)
## TODO: Make this work with multiple simulations
rd <- sim_dat$temp[[1]][["rel_durs"]]
rd$type <- "Simulated"
all_ended <- rd %>% filter(ong == "ended", at > 520) %>% group_by(ptype) %>%
mutate(rel_dur = cumsum((rel_dur * num_obs)[order(at)])/
cumsum((num_obs)[order(at)]),
ong = "ended_tot")
rd <- rbind(rd, all_ended)
epistats <- readRDS("../../Data/Params/epistats_whamp.rds")
main_mean_relage <-
as.numeric(1/(1 - (2^(-1/(epistats$relage.main$wtd.median)))))
casl_mean_relage <-
as.numeric(1/(1 - (2^(-1/(epistats$relage.casl$wtd.median)))))
rel_ages <- c(main_mean_relage, casl_mean_relage) / 52
targ_ages <- data.frame(ptype = 1:2, ong = NA,
rel_dur = rel_ages[1:2],
type = "Weighted Survey Responses")
semat <- rd %>% filter(at > 700) %>%
group_by(type, ong, ptype) %>% summarise(se = sd(rel_dur))
rd <- left_join(rd, semat, by = c("type", "ong", "ptype"))
ggplot(rd, aes(color = ong, linetype = type, fill = ong)) +
geom_line(aes(x = at / 52 + 1943, y = rel_dur), size = 1) +
geom_ribbon(aes(x = at / 52 + 1943, ymin = rel_dur - 2 * se,
ymax = rel_dur + 2 * se), alpha = 0.3, color = NA) +
geom_hline(data = targ_ages,
aes(color = ong, linetype = type, yintercept = rel_dur),
size = 1) +
facet_wrap(~ptype, labeller = as_labeller(c(`1` = "Main", `2` = "Casual"))) +
xlab("Year") + ggtitle("Mean Relationship Duration (in Years)") +
coord_cartesian(ylim = c(1.5, 5.5)) +
scale_color_discrete(
name = "Status",
labels = c("ong" = "Still Ongoing (rel_age)",
"ended" = "Ended in Last Year (rel_dur)",
"ended_tot" = "All Completed Relationships")) +
scale_fill_discrete(
name = "Status",
labels = c("ong" = "Still Ongoing (rel_age)",
"ended" = "Ended in Last Year (rel_dur)",
"ended_tot" = "All Completed Relationships")) +
scale_linetype_manual(name = "Source",
values = c("solid", "dotted")) +
cowplot::theme_cowplot() +
theme(legend.position = "bottom", axis.title = element_blank(),
legend.box = "vertical", legend.margin = margin(),
panel.background = element_rect(fill = "#E0E2E7"),
panel.grid = element_line(color = "#FFFFFF", size = 0.4))
deg.targs <- readRDS(here("Data", "Targets", "WhampNetTargets.RDS"))
el_inf <- sim_dat$el[[1]] %>% lapply(
FUN = function(x) {
t1 <- table(x[, 1])
t2 <- table(x[, 2])
all_edge <- data.frame(id = as.numeric(c(names(t1), names(t2))),
count = c(t1, t2)) %>%
group_by(id) %>% summarise(count = sum(count))
return(all_edge)
}
)
sim_dat$attr[[1]]$deg.main[el_inf[[1]]$id] <- el_inf[[1]]$count
sim_dat$attr[[1]]$deg.main[-el_inf[[1]]$id] <- 0
sim_dat$attr[[1]]$deg.casl[el_inf[[2]]$id] <- el_inf[[2]]$count
sim_dat$attr[[1]]$deg.casl[-el_inf[[2]]$id] <- 0
sim_dat$attr[[1]]$deg.inst <- rep(0, length(sim_dat$attr[[1]]$deg.casl))
sim_dat$attr[[1]]$deg.inst[el_inf[[3]]$id] <- el_inf[[3]]$count
sim_dat$attr[[1]]$deg.inst[-el_inf[[3]]$id] <- 0
## TODO: Make this work with multiple simulations
eos_dd_dat_orig <- do.call(
data.frame,
sim_dat$attr[[1]][c("deg.main", "deg.casl", "deg.inst", "race", "region",
"snap", "age.grp")])
eos_dd_dat <- eos_dd_dat_orig %>%
pivot_longer(cols = c("deg.main", "deg.casl", "deg.inst"),
names_to = "Partner Type",
values_to = "Degree") %>%
mutate(Degree = pmin(5, Degree))
eos_dd_dat$`Partner Type` = factor(
eos_dd_dat$`Partner Type`,
levels = c("deg.main", "deg.casl", "deg.inst"),
labels = c("Main", "Casl", "Inst"))
targ_dist <- bind_rows(
bind_cols(deg.targs$deg.dists$main,
`Partner Type` = "Main", Source = "Target") %>%
mutate(Degree = deg.main) %>% select(-deg.main),
bind_cols(deg.targs$deg.dists$casl,
`Partner Type` = "Casl", Source = "Target") %>%
mutate(Degree = deg.casl) %>% select(-deg.casl),
bind_cols(deg.targs$deg.dists$inst,
`Partner Type` = "Inst", Source = "Target") %>%
mutate(Degree = count.oo.wk) %>% select(-count.oo.wk),
) %>% select(`Partner Type`, Degree, proportion = prop.wtd, Source)
obs_dist <- eos_dd_dat %>% group_by(`Partner Type`, Degree) %>%
count() %>% ungroup() %>% group_by(`Partner Type`) %>%
mutate(proportion = n / sum(n),
Source = "Simulated") %>% ungroup()
ggplot(bind_rows(obs_dist, targ_dist),
aes(x = Degree, y = proportion, fill = Source)) +
geom_col(position = "dodge") + facet_wrap(~`Partner Type`) +
ylab("Proportion") +
labs(caption = "Targets come from the observed WHAMP degree distribution")
make_eos_deg_plot <- function(var, name) {
eos_dd_dat %>% group_by({{ var }}, Degree, `Partner Type`) %>%
summarise(count = n()) %>% ungroup() %>%
group_by({{ var }}) %>% mutate(Proportion = count / sum(count)) %>%
ggplot(aes(x = Degree, y = Proportion, color = {{ var }},
fill = {{ var }})) +
geom_col(position = "dodge") + facet_wrap(~`Partner Type`) +
theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 1)) + scale_color_discrete(name) +
scale_fill_discrete(name)
}
eos_dd_dat <- eos_dd_dat %>% mutate(
snap5 = pmin(5, snap),
race = recode(race, !!!attr_names$race),
region = recode(region, !!!attr_names$region),
snap5 = recode(paste0("sn", snap5), !!!attr_names$snap5),
age.grp = recode(age.grp, !!!attr_names$age.grp),
)
make_eos_deg_plot(race, "Race")
make_eos_deg_plot(region, "Region")
eos_dd_dat$age.grp <- as.character(eos_dd_dat$age.grp)
make_eos_deg_plot(age.grp, "Age Group")
make_eos_deg_plot(snap5, "SNAP Group")
snap.mean <- list(
mean.snap = list(name = "mean.snap",
sec_title = "Mean SNAP Value",
plot_name = "Mean SNAP ",
plot_cap = "",
plot_ylab = "Mean",
plt_type = "line",
vars = c("mean.snap."),
sum_fun = function(x) {x}
),
mean.snap5 = list(name = "mean.snap5",
sec_title = "Mean SNAP5 Value",
plot_name = "Mean SNAP5 ",
plot_cap = "",
plot_ylab = "Mean",
plt_type = "line",
vars = c("mean.snap5."),
sum_fun = function(x) {x}
)
)
print_many_plots(snap.mean, sim_dat, num_hash = 3, targ_df = cur_targs)
last_step <- rd %>% filter(at == max(rd$at)) %>% select(ptype, ong, rel_dur)
last_step$ong <- dplyr::recode(last_step$ong, ong = "Ongoing at Last time Step",
ended = "Ended in Last Year",
ended_tot = "All Completed Relationships")
target <- targ_ages %>% mutate(ong = "Target") %>% select(-type)
frst_tble <- bind_rows(target, last_step)
frst_tble$ptype <- c("Main", "Casual")[frst_tble$ptype]
tble <- frst_tble %>% pivot_wider(names_from = ptype, values_from = rel_dur)
targs <- tble %>% filter(row_number() == 1) %>%
mutate(count = 4) %>% uncount(count)
colnames(targs)[2:3] <- c("mt", "ct")
targs$ong <- tble$ong
fin_tab <- left_join(tble, targs, by = "ong") %>%
mutate(mpd = 100 * (Main - mt) / (Main),
cpd = 100 * (Casual - ct) / Casual) %>%
select(-mt, -ct) %>% relocate(mpd, .after = Main) %>%
mutate(across(where(is.double), round, 2)) %>%
mutate(targ = c("Simulated", "Target")[as.numeric(mpd == 0) + 1]) %>%
group_by(targ)
gt::gt(fin_tab) %>% gt::cols_label(ong = " ", Main = "Duration",
mpd = "Percentage Difference",
Casual = "Duration",
cpd = "Percentage Difference") %>%
gt::tab_header(
title = "Mean Relationship Durations in Years",
subtitle = "Target comes from Netest object and other values are based on EpiModel simulation"
) %>% gt::cols_align(
"center"
) %>% gt::tab_spanner(
label = "Main Relationships",
columns = c("Main", "mpd")
) %>% gt::tab_spanner(
label = "Casual Relationships",
columns = c("Casual", "cpd")
) %>%
gt::tab_style(
style = gt::cell_text(weight = "bold"),
locations = gt::cells_body(rows = ong == "Target")
) %>% gt::tab_style(
style = gt::cell_fill(color = "lightgrey"),
locations = list(
gt::cells_body(columns = c("mpd", "cpd"))
)
)
| Mean Relationship Durations in Years | ||||
|---|---|---|---|---|
| Target comes from Netest object and other values are based on EpiModel simulation | ||||
| Main Relationships | Casual Relationships | |||
| Duration | Percentage Difference | Duration | Percentage Difference | |
| Target | ||||
| Target | 4.82 | 0.00 | 2.29 | 0.00 |
| Simulated | ||||
| Ended in Last Year | 4.71 | -2.50 | 2.21 | -3.57 |
| Ongoing at Last time Step | 4.41 | -9.31 | 2.19 | -4.73 |
| All Completed Relationships | 4.53 | -6.58 | 2.19 | -4.72 |
For these plots, the histogram provides the observed distribution, and the red and blue lines are the expected probability density of relationship duration under an exponential model with the observed and target mean duration respectively.
## TODO: Make this work with multiple simulations
plist <- sim_dat$temp[[1]][["plist"]] %>% as.data.frame()
plist$ong <- "ended"
plist$ong[is.na(plist$stop)] <- "ong"
plist$stop[is.na(plist$stop)] <- max(plist$stop, na.rm = TRUE)
plist$length <- round(pmax(0, (plist$stop - (plist$start - 1)) / 52))
plist$flr_length <- floor(pmax(0, (plist$stop - (plist$start - 1)) / 52))
n_plist <- plist %>% filter(ptype != 3, flr_length < 25) %>%
group_by(ptype, ong, length = flr_length) %>%
count() %>% ungroup() %>% group_by(ptype, ong) %>%
mutate(prop = n / sum(n))
durs <- plist %>% filter(ptype != 3) %>%
group_by(ptype, ong) %>%
summarise(rel_dur = pmax(0, mean((stop - start) / 52)),
type = "Observed")
targ_durs <- data.frame(ptype = c(1, 1, 2, 2),
ong = c("ended", "ong",
"ended", "ong"),
rel_dur = rep(rel_ages, each = 2),
type = "Target")
durs <- bind_rows(durs, targ_durs)
exp_fits <- durs %>%
mutate(num = max(n_plist$length) + 1) %>%
uncount(weights = num, .id = "length") %>%
mutate(length = length - 1) %>%
mutate(prop = dexp(x = length + 0.5, rate = 1 / rel_dur))
ggplot(n_plist, aes(x = length, y = prop)) +
geom_line(data = exp_fits, aes(color = type), alpha = 1) +
geom_col(alpha = 0.5) +
facet_grid(
ptype ~ ong,
labeller = as_labeller(c(`1` = "Main", `2` = "Casual",
`ong` = "Still Ongoing at End",
`ended` = "Ended in Last Year"))) +
xlab("Relationship Duration") + ylab("Proportion") + labs(
caption = "Lines represent exponential fits based on the target mean duration and the \n observed mean duraiton in the EpiModel simulation at the last step"
)
tidy_sim <- eos_dd_dat_orig %>% filter(age.grp < 6) %>%
mutate(deg.casl = pmin(6, deg.casl),
deg.main = pmin(6, deg.main)) %>%
group_by(deg.main, deg.casl) %>%
count() %>% ungroup() %>%
mutate(n = ifelse(is.na(n), 0, n),
Proportion = round(100 * n / sum(n), 2)) %>%
select(-n)
simulated <- tidy_sim %>% pivot_wider(deg.main, names_from = "deg.casl",
values_from = Proportion) %>%
mutate(across(2:8, function(x) ifelse(is.na(x), 0, x)))
simulated %>%
gt::gt() %>%
gt::tab_spanner(
label = "Casual Degree",
columns = vars(`0`, `1`, `2`, `3`, `4`, `5`, `6`)
) %>%
gt::cols_label(
deg.main = "Main Degree"
)
| Main Degree | Casual Degree | ||||||
|---|---|---|---|---|---|---|---|
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | |
| 0 | 29.54 | 24.09 | 12.28 | 5.07 | 1.36 | 0.33 | 0.35 |
| 1 | 12.77 | 7.64 | 3.35 | 1.04 | 0.24 | 0.06 | 0.05 |
| 2 | 0.78 | 0.52 | 0.20 | 0.08 | 0.00 | 0.00 | 0.00 |
| 3 | 0.13 | 0.07 | 0.03 | 0.01 | 0.00 | 0.00 | 0.00 |
| 4 | 0.01 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 |
tidy_whamp <- WHAMPData::whampArtnetWA_egodata$main$egos %>%
group_by(deg.main, deg.casl) %>%
count() %>% ungroup() %>%
mutate(n = ifelse(is.na(n), 0, n),
Proportion = round(100 * n / sum(n), 2)) %>%
select(-n)
obs_whamp <- tidy_whamp %>% pivot_wider(deg.main, names_from = "deg.casl",
values_from = Proportion) %>%
mutate(across(2:7, function(x) ifelse(is.na(x), 0, x)))
obs_whamp %>%
gt::gt() %>%
gt::tab_spanner(
label = "Casual Degree",
columns = vars(`0`, `1`, `2`, `3`, `4`, `5`)
) %>%
gt::cols_label(
deg.main = "Main Degree"
)
| Main Degree | Casual Degree | |||||
|---|---|---|---|---|---|---|
| 0 | 1 | 2 | 3 | 4 | 5 | |
| 0 | 29.93 | 24.76 | 8.53 | 4.81 | 2.88 | 0.84 |
| 1 | 17.67 | 3.97 | 3.00 | 1.80 | 0.48 | 0.00 |
| 2 | 0.48 | 0.36 | 0.36 | 0.12 | 0.00 | 0.00 |
Difference in percentage between the WHAMP survey and simulated data.
diffs <- full_join(tidy_sim,
tidy_whamp %>% rename("prop.whamp" = Proportion),
by = c("deg.main", "deg.casl")) %>%
mutate(deg.main = c("0", "1", "2+")[1 + pmin(2, deg.main)],
deg.casl = c("0", "1", "2", "3+")[1 + pmin(3, deg.casl)]) %>%
group_by(deg.main, deg.casl) %>%
summarise(prop.whamp = sum(prop.whamp, na.rm = TRUE),
Proportion = sum(Proportion, na.rm = TRUE)) %>%
ungroup() %>%
mutate(prop.whamp = ifelse(is.na(prop.whamp), 0, prop.whamp)) %>%
mutate(abs_diff = prop.whamp - Proportion,
perc_dif = 100 * round(abs_diff / prop.whamp, 4))
diffs %>% select(deg.main, deg.casl, abs_diff) %>%
pivot_wider(deg.main, names_from = "deg.casl",
values_from = abs_diff) %>%
mutate(across(2:4, function(x) ifelse(is.na(x), 0, x))) %>%
gt::gt() %>%
gt::tab_spanner(
label = "Casual Degree",
columns = vars(`0`, `1`, `2`, `3+`)
) %>%
gt::cols_label(
deg.main = "Main Degree"
)
| Main Degree | Casual Degree | |||
|---|---|---|---|---|
| 0 | 1 | 2 | 3+ | |
| 0 | 0.39 | 0.67 | -3.75 | 1.42 |
| 1 | 4.90 | -3.67 | -0.35 | 0.89 |
| 2+ | -0.44 | -0.24 | 0.12 | 0.03 |
Here we calculate the percent difference (of the percentages). The formula for this calculation is
\[ 100 \times \frac{\text{WHAMP Percentage} - \text{Simulated Percentage}}{\text{WHAMP Percentage}} \]
diffs %>% select(deg.main, deg.casl, perc_dif) %>%
pivot_wider(deg.main, names_from = "deg.casl",
values_from = perc_dif) %>%
mutate(across(2:4, function(x) ifelse(is.na(x), 0, x))) %>%
gt::gt() %>%
gt::tab_spanner(
label = "Casual Degree",
columns = vars(`0`, `1`, `2`, `3+`)
) %>%
gt::cols_label(
deg.main = "Main Degree"
)
| Main Degree | Casual Degree | |||
|---|---|---|---|---|
| 0 | 1 | 2 | 3+ | |
| 0 | 1.30 | 2.71 | -43.96 | 16.65 |
| 1 | 27.73 | -92.44 | -11.67 | 39.04 |
| 2+ | -91.67 | -66.67 | 33.33 | 25.00 |