We confirm below various attributes of the agents that are logged.
We will do this in data.table for speed.
rm(list=ls())
library(data.table)
Read the agent data into data-table:
agent_dt <- fread("/gpfs/data/akhann16/akhann16/cadre_simulated_data/2022-12-09/agent_log.csv")
last_tick <- max(agent_dt$tick)
agent_dt[tick == last_tick, .N, by=race][,"%":=round(N/sum(N)*100)][order(race)]
agent_dt[tick==last_tick, .N, by=female][order(female)]
#[,"%":=N/sum(N)]
#[order(race)]
agebreaks <- c(18, 25, 35, 45, 55, 65)
agelabels <- c("18-24", "25-34", "35-44", "45-54", "55-64")
agent_dt[tick == last_tick, .N, ]
## [1] 10000
setDT(agent_dt)[ , age_groups := cut(age,
breaks = agebreaks,
include.lowest = TRUE,
right = FALSE,
labels = agelabels)]
nrow(agent_dt[tick == last_tick])
## [1] 10000
agent_dt[tick == last_tick, .N, by=c("age_groups", "race", "female")][order(age_groups, race, female)]
agent_dt[tick == last_tick, .N, by=c("race", "age_groups", "female")][order(race, age_groups)]
agent_dt[tick==last_tick, .N, by=c("race", "female")][,"%":=N/sum(N)*100][order(race)]
agent_dt[tick==last_tick, .N, by=c("age_groups")][,"%":=round(N/sum(N)*100)][order(age_groups)]
Median age at the start:
agent_dt[tick==1, median(age)]
## [1] 41
Median age at the end:
nrow(agent_dt[tick==last_tick])
## [1] 10000
agent_dt[tick==last_tick, median(age)]
## [1] 41
ans <- agent_dt[tick==last_tick & race == "Black" & female == 0, .N, by=c("smoking_status")][,"prop":=round(N/sum(N)*100, 0)][]
ans
agent_dt[tick == last_tick, .N, by = c("alc_use_status")][,
"%" := round(N /sum(N) * 100, 0)][
order(alc_use_status)][
]
We check below the ages of new agents, and if their initial age assignments and age increments are working correctly.
How many agents are present at the last tick:
agent_dt[tick == last_tick, .N]
## [1] 10000
How many agents present at tick 991 entered after model initialization?
agent_dt[tick == 991 & id > 9999, .N]
## [1] 616
The mean age of agents at time by time of entry at tick 1:
agent_dt[tick == 1,
.("mean_age_today"=mean(age), .N),
by=.(entry_at_tick),
]
At tick 491:
agent_dt[tick == 491,
.("mean_age_today"=mean(age), .N),
by=.(entry_at_tick),
]
And at tick 991:
agent_dt[tick == 991,
.("mean_age_today"=mean(age), .N),
by=.(entry_at_tick),
]
These data seem to match expectations of aging of newly entering agents. See that at tick 1, only one agent had entered after model initialization. and this agent was 18 years old.
At tick 491, this agent was 19 years old (491-365 years older than 18, rounded), and at tick 991, this agent was 21 years old (991-365 years older than 18, rounded).
So, the aging process seems to work as expected.
Number and proportion of incarcerated persons at last tick:
agent_dt[tick==last_tick, .N, by=c("current_incarceration_status")][,"prop":=round(N/sum(N)*100, 0)][]
Break down number and proportions of incarcerated persons at last tick by race, sex and age:
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("race")][,"prop":=round(N/sum(N)*100, 0)][]
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("female")][,"prop":=round(N/sum(N)*100, 0)][]
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("age_groups")][,"prop":=round(N/sum(N)*100, 0)][]
agent_dt[tick==last_tick & current_incarceration_status == 1,
.N,
by=c("race", "female", "age_groups")][order(race, female, age_groups)][,"prop":=round(N/sum(N)*100, 0)][]
Break down number and proportions of persons every incarcerated at last tick by race, sex and age:
ever_inc_dt <-
agent_dt[tick==last_incarceration_tick & n_incarcerations > 0,
.N,
by=c("race", "female", "age_groups")][order(race, female, age_groups)][
,"prop":=round(N/sum(N)*100, 0)][]
ever_inc_dt
ever_inc_dt[, colSums(.SD), .SDcols = 4:5]
## N prop
## 129 104
Mean number of incarcerations at last tick:
agent_dt[tick == last_tick,
.(
"mean_n_incarcerations" = mean(n_incarcerations),
"min_n_incarcerations" = min(n_incarcerations),
"max_n_incarcerations" = max(n_incarcerations),
"median_n_incarcerations" = median(n_incarcerations),
"n_ever_incarcerated_agents"=sum(n_incarcerations > 0),
.N)]
Compute statistics on agents whose last release date is after their last incarceration date.
agent_dt[tick == last_tick & last_release_tick >= last_incarceration_tick,
.(
"mean_duration" = mean(last_release_tick - last_incarceration_tick),
"max_duration" = max(last_release_tick - last_incarceration_tick),
"min_duration" = min(last_release_tick - last_incarceration_tick),
"median_duration" = median(last_release_tick - last_incarceration_tick),
.N)]
agent_dt[tick == last_tick,
.(
"mean_n_releases" = mean(n_releases),
"min_n_releases" = min(n_releases),
"max_n_releases" = max(n_releases),
"median_n_releases" = median(n_releases),
"n_ever_released_agents"=sum(n_releases > 0),
.N)]
Compute statistics on agents whose last release date is *before* their last incarceration date.
agent_dt[tick == last_tick & last_release_tick < last_incarceration_tick,
.(
"mean_rel_dur" = mean(abs(last_release_tick - last_incarceration_tick)),
"max_rel_dur" = max(abs(last_release_tick - last_incarceration_tick)),
"min_rel_dur" = min(abs(last_release_tick - last_incarceration_tick)),
"median_rel_dur" = median(abs(last_release_tick - last_incarceration_tick)),
.N)]
agent_dt[tick == last_tick,
.(
"mean_n_smkg_trans" = mean(n_smkg_stat_trans),
"max_n_smkg_trans" = max(n_smkg_stat_trans),
"min_n_smkg_trans" = min(n_smkg_stat_trans),
"median_n_smkg_trans" = median(n_smkg_stat_trans),
.N)]
Smoking State Distributions:
ans <- agent_dt[tick == last_tick, .N, by=c("race", "smoking_status")][order(race)]
ans
ans[, prop := N / sum(N), by = race][]
ans_num_smokers <- ans[smoking_status %in% c("Former", "Current", "Never"),
.(num_smokers = sum(N)),
by = race]
ans_num_smokers
sum(ans_num_smokers$num_smokers)
## [1] 10000
agent_dt[tick == last_tick,
.(
"mean_n_alc_trans" = mean(n_alc_use_stat_trans),
"max_n_alc_trans" = max(n_alc_use_stat_trans),
"min_n_alc_trans" = min(n_alc_use_stat_trans),
"median_n_alc_trans" = median(n_alc_use_stat_trans),
.N)]
Alcohol Use State Distributions:
ans <- agent_dt[tick == last_tick, .N, by=alc_use_status]
prop <- ans$N/sum(ans$N)
cbind(ans[,1], prop)