We confirm below various attributes of the agents that are logged.

We will do this in data.table for speed.

rm(list=ls())
renv::load()
library(data.table)
library(ggplot2)
library(yaml)

Read input parameters:¬

input_params <- read_yaml("~/code/cadre/python/myparams/model_params.yaml")

Read the agent data into data-table:

agent_dt <- fread("~/code/cadre/python/output/agent_log_5.csv")
str(agent_dt)

## Classes 'data.table' and 'data.frame':   101041 obs. of  16 variables:
##  $ tick                        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ id                          : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ age                         : int  20 43 33 24 63 29 63 21 41 80 ...
##  $ race                        : chr  "White" "White" "White" "White" ...
##  $ female                      : int  0 0 1 0 0 1 1 1 1 1 ...
##  $ alc_use_status              : int  1 1 1 2 3 1 1 1 1 1 ...
##  $ smoking_status              : chr  "Former" "Never" "Former" "Never" ...
##  $ last_incarceration_tick     : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ last_release_tick           : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ current_incarceration_status: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ entry_at_tick               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ exit_at_tick                : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ n_incarcerations            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_releases                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_smkg_stat_trans           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_alc_use_stat_trans        : int  0 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, ".internal.selfref")=<externalptr>

last_tick <- max(agent_dt$tick)

How many unique agents are in the dataset?

length(unique(agent_dt$id))

## [1] 1041

range(unique(agent_dt$id))

## [1]    0 1040

There are 1041 unique agents in the dataset. Their IDs range from 0 to 1040.

Demographics

agent_dt_by_race <- agent_dt[tick == last_tick, .N, by=race][,"per_cent":=round(N/sum(N)*100)][order(race)]
target_values <- data.frame(race = names(input_params$RACE_DISTRIBUTION), 
                            target_pct = unlist(input_params$RACE_DISTRIBUTION))

# Calculate the percentages
agent_dt_by_race[, .(count = .N), by = race][, percentage := round(count/sum(count) * 100, 2)]

# Order the data by race
agent_dt_by_race <- agent_dt_by_race[order(race)]

# Create the bar chart
ggplot(agent_dt_by_race, aes(x = race, y = per_cent, fill = race)) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = c("#7bccc4", "#43a2ca", "#0868ac", "#f0f9e8")) +
  labs(title = "Race Distribution",
       x = "Race",
       y = "Percentage",
       fill = "Race")+
  geom_segment(data = target_values, 
               aes(x = race, xend = race, y = 0, yend = target_pct), 
               color = "red", size = 1, linetype = "dashed") +
  geom_text(data = target_values, 
            aes(x = race, y = target_pct*97, label = paste0(round(target_pct*100, 1), "%")),
            vjust = -0.5, size = 4, color = "red")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

agent_dt[tick==last_tick, .N, by=female][,"%":=N/sum(N)][order(female)]

female_target <- input_params$FEMALE_PROP

gender_pct <- agent_dt[tick == last_tick, .N, by = female][, "%":= N/sum(N)][order(female)]

# calculate percentages
gender_pct <- agent_dt[tick == last_tick, .N, by = female][, "%":= N/sum(N)][order(female)]

# read in YAML file with target value
female_target <- input_params$FEMALE_PROP

# calculate target percentages for each gender
female_target_pct <- female_target
male_target_pct <- 1 - female_target

# calculate actual percentages for each gender
female_actual_pct <- gender_pct[female == 1, `%`]
male_actual_pct <- gender_pct[female == 0, `%`]


# create plot
ggplot(gender_pct, aes(x = ifelse(female == 0, "Male", "Female"), y = `%`, fill = ifelse(female == 0, "Male", "Female"))) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = c("#1b9e77", "#d95f02")) +
  annotate("text", x = 1, y = female_actual_pct, 
           label = paste0("Female Target = ", round(female_actual_pct*100, 1), "%"), 
           color = "#1b9e77", size = 4, vjust = 0) +
  annotate("text", x = 2, y = male_actual_pct, 
           label = paste0("Male Target = ", round(male_actual_pct*100, 1), "%"), 
           color = "#d95f02", size = 4, vjust = 0) +
  labs(title = "Agent Gender Distribution",
       x = "",
       y = "Percentage",
       fill = "") +
  theme_minimal()

agebreaks <- c(18, 25, 35, 45, 55, 65)
agelabels <- c("18-24", "25-34", "35-44", "45-54", "55-64")

agent_dt[tick == last_tick, .N, ]

## [1] 1000

setDT(agent_dt)[ , age_groups := cut(age, 
                                breaks = agebreaks, 
                                include.lowest = TRUE,
                                right = FALSE, 
                                labels = agelabels)]
nrow(agent_dt[tick == last_tick])

## [1] 1000

agent_dt[tick == last_tick, .N, by=c("age_groups", "race", "female")][order(age_groups, race, female)]

agent_dt[tick == last_tick, .N, by=c("race", "age_groups", "female")][order(race, age_groups)]

agent_dt[tick==last_tick, .N, by=c("race", "female")][,"%":=N/sum(N)*100][order(race)]

agent_dt[tick==last_tick, .N, by=c("age_groups")][,"%":=round(N/sum(N)*100)][order(age_groups)]

Median age at the start:

agent_dt[tick==1, median(age)]

## [1] 51

Median age at the end:

nrow(agent_dt[tick==last_tick])

## [1] 1000

agent_dt[tick==last_tick, median(age)]

## [1] 53

# Create age groups
agent_dt_by_age_group <- agent_dt[tick==last_tick, .N, by = .(age_group = cut(age, breaks = c(18, 25, 35, 45, 55, 65, 75, 80, 84)))]

# Create bar chart
ggplot(agent_dt_by_age_group, aes(x = age_group, y = N, fill = age_group)) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf")) +
  labs(title = "Agent Age Distribution",
       x = "Age Group",
       y = "Count",
       fill = "Age Group") +
  theme_minimal()

Baseline Smoking Behaviors

At the first tick:

agent_dt[tick==1 & race == "Black" & female == 0, .N, by=c("smoking_status")][]

agent_dt[tick==1 & race == "Black" & female == 0, .N, by=c("smoking_status")][,"sim_prop":= round(N/sum(N), 3)][]

# targets
BLACK_MALE_CURRENT = input_params$SMOKING_PREV$BLACK_MALE_CURRENT
BLACK_MALE_FORMER = input_params$SMOKING_PREV$BLACK_MALE_FORMER
BLACK_MALE_NEVER = input_params$SMOKING_PREV$BLACK_MALE_NEVER

smkg_prev_black_men_target <- c(BLACK_MALE_CURRENT, BLACK_MALE_FORMER, BLACK_MALE_NEVER)

smkg_prev_black_men_target

## [1] 0.205 0.473 0.322

## Add difference between the two target and the sim

At the last tick:

smkg_blackmen_last_tick <- agent_dt[tick==last_tick & race == "Black" & female == 0, .N, by=c("smoking_status")][,"prop":=round(N/sum(N), 3)][]
smkg_blackmen_last_tick

smkg_prev_black_men_target

## [1] 0.205 0.473 0.322

# Count the number and percentage of each smoking status
smoking_dt <- agent_dt[tick == last_tick, .N, by = smoking_status][, `:=` (percentage = round(N/sum(N)*100, 2))]
# Create the bar chart
ggplot(smoking_dt, aes(x = smoking_status, y = percentage, fill = smoking_status)) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = c("#fec44f", "#43a2ca", "#7bccc4")) +
  labs(title = "Agent Smoking Status Distribution",
       x = "Smoking Status",
       y = "Percentage",
       fill = "Smoking Status") +
  theme_minimal()

Baseline Alcohol Use

alc_use_last_tick <- 
  agent_dt[tick == last_tick, .N, by = c("alc_use_status")][, 
  "%" := round(N /sum(N), 3)][
  order(alc_use_status)][
  ]

# targets
alc_use_props <- input_params$ALC_USE_PROPS
alc_use_targets <- c(alc_use_props$A, alc_use_props$O, alc_use_props$R, alc_use_props$D)

cbind(alc_use_last_tick[["%"]], alc_use_targets)

##            alc_use_targets
## [1,] 0.073           0.083
## [2,] 0.733           0.729
## [3,] 0.134           0.132
## [4,] 0.060           0.056

# Calculate the percentages for alcohol use status
alc_use_dt <- agent_dt[tick == last_tick, .N, by = alc_use_status][, `:=` (percentage = round(N/sum(N)*100, 2))]
# Order the data table by alcohol use status
setorder(alc_use_dt, alc_use_status)
# Rename the alcohol use status codes to descriptive labels
alc_use_dt[, alc_use_status := factor(alc_use_status, levels = c(0, 1, 2, 3), 
                                      labels = c("Lifetime Abstainer", 
                                                 "Ocassional", 
                                                 "Regular", 
                                                 "Heavy"))]

ggplot(alc_use_dt, aes(x = alc_use_status, y = percentage, fill = alc_use_status)) +
  geom_bar(stat = "identity", color = "black") +
  labs(title = "Agent Alcohol Use Status Distribution",
       x = "Use Frequency",
       y = "Percentage",
       fill = "Alcohol Use Status") +
  theme_minimal() +
  guides(fill = "none") +
  geom_text(aes(label = paste0(as.numeric(input_params$ALC_USE_PROPS)*100, "%")), 
            position = position_stack(vjust = 1.02))+
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 10))

Smoking and Alcohol Co-Use

# Create a contingency table of smoking and alcohol use status
smoking_alc_use_table <- dcast(agent_dt[tick == last_tick], smoking_status ~ alc_use_status, value.var = "id", fun.aggregate = length)
smoking_alc_use_table <- smoking_alc_use_table[, .(smoking_status, `Lifetime Abstainer` = `0`, Occasional = `1`, Regular = `2`, Heavy = `3`)]
smoking_alc_use_melt <- melt(smoking_alc_use_table, id.vars = "smoking_status",
                             variable.name = "alcohol_use")

smkg_alc_use_xtab <- dcast(smoking_alc_use_melt, smoking_status ~ alcohol_use, value.var = "value")
smkg_alc_use_xtab

smoking_alc_col_props <- prop.table(as.matrix(smkg_alc_use_xtab[,-1]), margin = 1)
smoking_alc_row_props <- prop.table(as.matrix(smkg_alc_use_xtab[,-1]), margin = 2)

smoking_alc_col_props <- cbind(smkg_alc_use_xtab[,1], round(smoking_alc_col_props, 3))
smoking_alc_row_props <- cbind(smkg_alc_use_xtab[,1], round(smoking_alc_row_props, 3))

print(smoking_alc_col_props)

##    smoking_status Lifetime Abstainer Occasional Regular Heavy
## 1:        Current              0.047      0.812   0.121 0.020
## 2:         Former              0.083      0.707   0.131 0.079
## 3:          Never              0.072      0.731   0.142 0.056

print(smoking_alc_row_props)

##    smoking_status Lifetime Abstainer Occasional Regular Heavy
## 1:        Current              0.096      0.165   0.134  0.05
## 2:         Former              0.479      0.405   0.410  0.55
## 3:          Never              0.425      0.430   0.455  0.40

rowSums(smoking_alc_col_props[,-1])

## [1] 1.000 1.000 1.001

colSums(smoking_alc_row_props[,-1])

## Lifetime Abstainer         Occasional            Regular              Heavy 
##              1.000              1.000              0.999              1.000

# Create a stacked bar chart of smoking and alcohol use status
ggplot(smoking_alc_use_melt, aes(x = smoking_status, y = value, fill = alcohol_use)) +
  geom_bar(stat = "identity", color = "black") +
  labs(title = "Agent Smoking and Alcohol Use Status Distribution",
       x = "Smoking Status",
       y = "Count",
       fill = "Alcohol Use Status") +
  theme_minimal() +
  scale_fill_manual(values = c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c")) +
  guides(fill = "none")

# Create a contingency table of smoking and alcohol use status
smoking_alc_use_table <- dcast(agent_dt[tick == last_tick], smoking_status ~ alc_use_status, value.var = "id", fun.aggregate = length)
smoking_alc_use_table <- smoking_alc_use_table[, .(smoking_status, `Lifetime Abstainer` = `0`, Occasional = `1`, Regular = `2`, Heavy = `3`)]
smoking_alc_use_melt <- melt(smoking_alc_use_table, id.vars = "smoking_status")

# Create a stacked bar chart of smoking and alcohol use status
ggplot(smoking_alc_use_melt, aes(x = smoking_status, y = value, fill = variable)) +
  geom_bar(stat = "identity", color = "black") +
  labs(title = "Agent Smoking and Alcohol Use Status Distribution",
       x = "Smoking Status",
       y = "Count") +
  scale_fill_manual(values = c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c"), 
                    labels = c("Lifetime Abstainer", "Occasional", "Regular", "Heavy"),
                    name = "Alcohol Use Status") +
  theme_minimal()

# Age Assignment at Entry

We check below the ages of new agents, and if their initial age assignments and age increments are working correctly.

How many agents are present at the last tick:

agent_dt[tick == last_tick, .N]

## [1] 1000

How many agents present at tick 991 entered after model initialization?

agent_dt[tick == 991 & id > 9999, .N]

## [1] 0

The mean age of agents at time by time of entry at tick 1:

agent_dt[tick == 1, 
         .("mean_age_today"=mean(age), .N), 
                        by=.(entry_at_tick),
         ]

At tick 491:

agent_dt[tick == 491, 
         .("mean_age_today"=mean(age), .N), 
                        by=.(entry_at_tick),
         ]

And at tick 991:

agent_dt[tick == 991, 
         .("mean_age_today"=mean(age), .N), 
                        by=.(entry_at_tick),
         ]

These data seem to match expectations of aging of newly entering agents. See that at tick 1, only one agent had entered after model initialization. and this agent was 18 years old.

At tick 491, this agent was 19 years old (491-365 years older than 18, rounded), and at tick 991, this agent was 21 years old (991-365 years older than 18, rounded).

So, the aging process seems to work as expected.

Incarceration Statistics

Number and proportion of incarcerated persons at last tick:

agent_dt[tick==last_tick, .N, by=c("current_incarceration_status")][,"prop":=round(N/sum(N)*100, 0)][]

Break down number and proportions of incarcerated persons at last tick by race, sex and age:

agent_dt[tick==last_tick & current_incarceration_status == 1, 
         .N, 
         by=c("race")][,"prop":=round(N/sum(N)*100, 0)][]

agent_dt[tick==last_tick & current_incarceration_status == 1, 
         .N, 
         by=c("female")][,"prop":=round(N/sum(N)*100, 0)][]

agent_dt[tick==last_tick & current_incarceration_status == 1, 
         .N, 
         by=c("age_groups")][,"prop":=round(N/sum(N)*100, 0)][]

agent_dt[tick==last_tick & current_incarceration_status == 1, 
         .N, 
         by=c("race", "female", "age_groups")][order(race, female, age_groups)][,"prop":=round(N/sum(N)*100, 0)][]

How many persons have been ever incarcerated?

To answer this question, we can group the last incarceration times by agent ID. If last incarceration time is -1, the person has never been incarcerated. If last incarceration time is > -1, the person has been incarcerated at least once.

# Group data by agent ID and compute the last incarceration time
last_incarceration_time <- agent_dt[, 
                                    .(last_incarceration_time = max(last_incarceration_tick)), 
                                    by = id]

# Count the number of agents who have been incarcerated at least once
n_ever_incarcerated <- sum(last_incarceration_time$last_incarceration_time != -1)

# Compute the proportion of agents who have been incarcerated at least once
prop_ever_incarcerated <- n_ever_incarcerated / nrow(last_incarceration_time)


# Agents who have been incarcerated at least once
ever_incarcerated_times <- last_incarceration_time[last_incarceration_time > 1]
dim(ever_incarcerated_times)

## [1] 9 2

# Extract demographic information for ever incarcerated agents
ever_incarcerated_ids <-
  unique(agent_dt[id %in% ever_incarcerated_times$id, id])
ever_incarcerated_info <- agent_dt[id %in% ever_incarcerated_ids,
                                   .SD[.N, list(
                                     id,
                                     age = last(age),
                                     race = last(race),
                                     female = last(female)
                                   )],
                                   by = id]

# Compute distributions
## mean age
age_summary <- ever_incarcerated_info[, .(mean_age=mean(age),
                                       min_age=min(age),
                                       max_age=max(age)
                                       )
                                   ]
sex_dist <-
  ever_incarcerated_info[, .N, by = female][,  .(
    Sex = c("Female", "Male"),
    Count = c(N[female == 0], N[female == 1]),
    "Proportion of ever incarcerated" = round(N / sum(N), 2)
  )]
print(sex_dist)

##       Sex Count Proportion of ever incarcerated
## 1: Female     2                            0.78
## 2:   Male     7                            0.22

race_dist <- ever_incarcerated_info[, .N, by = race][,
  .(Race = race, 
    Count = N,
    Proportion = round(N / sum(N), 2)
   )
  ]
race_dist[]

Thus, 9 persons have been incarcerated at least once. The proportion of persons who have been ever incarcerated is 0.0086455.

Now let us compute the number of females and males whoa have ever been incarcerated, and the proportions by race (% of White, Black, …)

first_tick_race_sex <-
  agent_dt[, .SD[1], by = id][, .(
    ID = id,
    Tick = tick,
    Female = female,
    Race = race
  )]

# Count number of females and males
sex_count <- first_tick_race_sex[, .N, by = Female][,
                                               .(Sex = c("Female", "Male"), 
                                                 Count = c(N[Female == 1], N[Female == 0]))                                               ]
print(sex_count)

##       Sex Count
## 1: Female   548
## 2:   Male   493

# Count number of individuals in each race
race_count <- first_tick_race_sex[, .N, by = Race][,
                                                   .(Race, Count = N)]
print(race_count)

##        Race Count
## 1:    White   764
## 2:    Asian    38
## 3: Hispanic   166
## 4:    Black    73

Thus, the distribution of ever incarcerated persons by sex is

cbind(sex_count$Sex, round(as.numeric(sex_dist$Count/sex_count$Count), 3))

##      [,1]     [,2]   
## [1,] "Female" "0.004"
## [2,] "Male"   "0.014"

The distribution of ever incarcerated persons by race is:

cbind(race_count$Race, round(as.numeric(race_dist$Count/race_count$Count), 3))

## Warning in race_dist$Count/race_count$Count: longer object length is not a
## multiple of shorter object length

##      [,1]       [,2]   
## [1,] "White"    "0.001"
## [2,] "Asian"    "0.053"
## [3,] "Hispanic" "0.036"
## [4,] "Black"    "0.014"

What is the duration of incarceration for persons who have been ever incarcerated? Restricting our data to only persons who have been released from their last incarceration episode, we can estimate this as:

# Filter the data to include only the observations for agents who have been ever incarcerated and released
ever_incarcerated_released <-
  agent_dt[last_release_tick > last_incarceration_tick,
           .SD[1, ], # ensure times for each incarceration, release episode appear only once
           by = .(id, last_incarceration_tick)]

# Compute the length of incarceration time for each ever incarcerated and released agent
ever_incarcerated_released[, incarceration_length := last_release_tick - last_incarceration_tick,
                           by = id]

# Compute the summary statistics of the lengths of incarceration time for the ever incarcerated and released agents
incarceration_summary <-
  ever_incarcerated_released[, .(
    min_incarceration_length = min(incarceration_length),
    median_incarceration_length = median(incarceration_length),
    max_incarceration_length = max(incarceration_length)
  ),
  by = id]

# Compute the distribution of incarceration times for all incarceration episodes followed by release
summary(ever_incarcerated_released$incarceration_length)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.00   11.00   53.00   95.11  118.00  362.00

Assess the total number of incarceration-release events and the number of unique agents who experienced them:

length(ever_incarcerated_released$id)

## [1] 9

length(unique(ever_incarcerated_released$id))

## [1] 8

Distribution of incarceration events:

hist(ever_incarcerated_released$incarceration_length, 
     breaks = seq(0, 3000, by = 30),
     xlab = "Length of incarceration (days)",
     ylab = "Number of individuals",
     main = "Distribution of incarceration lengths")

Break down number and proportions of persons ever incarcerated at last tick by race, sex and age:

ever_inc_dt <- 
  agent_dt[tick==last_incarceration_tick & n_incarcerations > 0, 
           .N, 
           by=c("race", "female", "age_groups")][order(race, female, age_groups)][
             ,"prop":=round(N/sum(N)*100, 0)][]

ever_inc_dt

ever_inc_dt[, colSums(.SD), .SDcols = 4:5]

##    N prop 
##    2  100

Mean number of incarcerations at last tick:

agent_dt[tick == last_tick, 
         .(
           "mean_n_incarcerations" = mean(n_incarcerations),
           "min_n_incarcerations" = min(n_incarcerations),
           "max_n_incarcerations" = max(n_incarcerations),
           "median_n_incarcerations" = median(n_incarcerations),
           "n_ever_incarcerated_agents"=sum(n_incarcerations > 0),
         .N)]

Incarceration Duration

Compute statistics on agents whose last release date is after their last incarceration date.

agent_dt[tick == last_tick & last_release_tick >= last_incarceration_tick, 
         .(
           "mean_duration" = mean(last_release_tick - last_incarceration_tick),
           "max_duration" = max(last_release_tick - last_incarceration_tick),
           "min_duration" = min(last_release_tick - last_incarceration_tick),
           "median_duration" = median(last_release_tick - last_incarceration_tick),
         .N)]

Release Statistics

Number

agent_dt[tick == last_tick, 
         .(
           "mean_n_releases" = mean(n_releases),
           "min_n_releases" = min(n_releases),
           "max_n_releases" = max(n_releases),
           "median_n_releases" = median(n_releases),
           "n_ever_released_agents"=sum(n_releases > 0),
         .N)]

Duration

Compute statistics on agents whose last release date is *before* their last incarceration date.

agent_dt[tick == last_tick & last_release_tick < last_incarceration_tick, 
         .(
           "mean_rel_dur" = mean(abs(last_release_tick - last_incarceration_tick)),
           "max_rel_dur" = max(abs(last_release_tick - last_incarceration_tick)),
           "min_rel_dur" = min(abs(last_release_tick - last_incarceration_tick)),
           "median_rel_dur" = median(abs(last_release_tick - last_incarceration_tick)),
         .N)]

Smoking transition behavior

agent_dt[tick == last_tick, 
         .(
           "mean_n_smkg_trans" = mean(n_smkg_stat_trans),
           "max_n_smkg_trans" = max(n_smkg_stat_trans),
           "min_n_smkg_trans" = min(n_smkg_stat_trans),
           "median_n_smkg_trans" = median(n_smkg_stat_trans),
         .N)]

Smoking State Distributions:

ans <- agent_dt[tick == last_tick, .N, by=c("race", "smoking_status")][order(race)]
ans

ans[, prop := N / sum(N), by = race][]

ans_num_smokers <- ans[smoking_status %in% c("Former", "Current", "Never"),
                       .(num_smokers = sum(N)),
                       by = race]
ans_num_smokers

sum(ans_num_smokers$num_smokers)

## [1] 1000

sim_prop_smokers_by_race <- ans_num_smokers$num_smokers/sum(ans_num_smokers$num_smokers)

Alcohol transition behavior

agent_dt[tick == last_tick, 
         .(
           "mean_n_alc_trans" = mean(n_alc_use_stat_trans),
           "max_n_alc_trans" = max(n_alc_use_stat_trans),
           "min_n_alc_trans" = min(n_alc_use_stat_trans),
           "median_n_alc_trans" = median(n_alc_use_stat_trans),
         .N)]

Alcohol Use State Distributions:

ans <- agent_dt[tick == last_tick, .N, by=alc_use_status]
prop <- ans$N/sum(ans$N)
cbind(ans[,1], prop)

Smoking and Alcohol Use among Incarcerated Persons vs Not-

Smoking

Proportion of current smokers among persons who have been released 0 times.

agent_dt[tick == last_tick, .N, by = n_releases]

agent_dt[tick == last_tick, .N, by = smoking_status]

current_smoking_by_release_dcast <- dcast(agent_dt[tick == last_tick], smoking_status ~ n_releases, value.var = "id", fun.aggregate = length)

## Proportion of current smokers among persons who have been released 0 times:
current_smoking_by_release_dcast[smoking_status == "Current", sum(`0`)] / sum(current_smoking_by_release_dcast$`0`)

## [1] 0.1491935

Proportion of current smokers among persons who have been released at least once:

n_smokers_release_once <- apply(current_smoking_by_release_dcast[,c(-1, -2)], 1, sum)
n_smokers_release_once[1]/sum(n_smokers_release_once)

## [1] 0.125

What proportion of current or former smokers are currently smoking in the group where n_releases = 0 vs. the group n_releases > 0?

n_releases_0 <- current_smoking_by_release_dcast[,2]
#n_releases_ge1 <- current_smoking_by_release_dcast[,3:6]

n_releases_0[1]/sum(n_releases_0[1:2])

#sum(n_releases_ge1[1,])/sum(n_releases_ge1[1:2,])

Alcohol Use

agent_dt[tick == last_tick, .N, by = n_releases][order(n_releases)]

agent_dt[tick == last_tick, .N, by = alc_use_status][order(alc_use_status)]

alc_use_by_release_dcast <- dcast(agent_dt[tick==last_tick,], 
                                          alc_use_status ~ n_releases, 
                                          value.var = "id", 
                                          fun.aggregate = length)
alc_use_by_release_dcast

What is the proportion of persons with alcohol use disorder in the n_releases = 0 group?

alc_use_by_release_dcast[4,2]/sum(alc_use_by_release_dcast[,2])

What is the proportion of persons with alcohol use disorder in the n_releases > 0 group?

#alc_use_by_release_ge1 <- alc_use_by_release_dcast[,c(-1,-2)]
#sum(alc_use_by_release_ge1[,4])/sum(alc_use_by_release_ge1)

R Notebook

17 April, 2023