# Analyze incarceration data (This is from the agent log, there is a separate incarceartion log)
rm(list=ls())
# Load R environment ---------
renv::activate()
# Load packages ---------
library(here)
## here() starts at /users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r
library(data.table)
library(yaml)
library(ggplot2)
# Read RDS file ------------
incarceration_log_env <-
readRDS("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/incarceration-log-analysis/rds-outs/incarceration_log_env.RDS")
names(incarceration_log_env)
## [1] "incarceration_dt"
# Load data ------------
incarceration_dt <- incarceration_log_env[["incarceration_dt"]]
# Explore data ------------
str(incarceration_dt)
## Classes 'data.table' and 'data.frame': 1778 obs. of 7 variables:
## $ tick : int 15 34 53 55 57 64 75 84 85 101 ...
## $ id : int 7950 4510 6285 1935 4137 8400 8969 6392 7352 8810 ...
## $ age : int 77 50 53 80 38 28 74 81 20 64 ...
## $ race : chr "Hispanic" "Hispanic" "White" "White" ...
## $ female : int 0 0 0 0 0 1 0 0 0 0 ...
## $ alc_use_status: int 1 0 3 0 1 0 1 3 1 2 ...
## $ smoking_status: chr "Current" "Current" "Current" "Never" ...
## - attr(*, ".internal.selfref")=<externalptr>
dim(incarceration_dt)
## [1] 1778 7
incarceration_dt[,.N]
## [1] 1778
# Analyze state distributions for all incarceration events ------------
n_inc_events <- length(incarceration_dt$id)
incarceration_dt[, .(Count = .N,
Proportion = .N/nrow(incarceration_dt)),
by = .(race)]
## race Count Proportion
## 1: Hispanic 317 0.178290214
## 2: White 375 0.210911136
## 3: Black 1076 0.605174353
## 4: Asian 10 0.005624297
incarceration_dt[, .(Count = .N,
Proportion = .N/nrow(incarceration_dt)),
by = .(female)]
## female Count Proportion
## 1: 0 1743 0.98031496
## 2: 1 35 0.01968504
incarceration_dt[, .(Count = .N,
Proportion = .N/nrow(incarceration_dt)),
by = .(alc_use_status)]
## alc_use_status Count Proportion
## 1: 1 576 0.32395951
## 2: 0 420 0.23622047
## 3: 3 711 0.39988751
## 4: 2 71 0.03993251
incarceration_dt[, .(Count = .N,
Proportion = .N/nrow(incarceration_dt)),
by = .(smoking_status)]
## smoking_status Count Proportion
## 1: Current 1519 0.85433071
## 2: Never 178 0.10011249
## 3: Former 81 0.04555681
# Analyze state distributions for first-time incarceration events ------------
first_incarceration_dt <- incarceration_dt[order(id, tick), .SD[1], by = id]
head(first_incarceration_dt)
## id tick age race female alc_use_status smoking_status
## 1: 9 6753 64 White 0 0 Current
## 2: 12 199 50 Black 0 3 Current
## 3: 20 409 70 Hispanic 0 0 Current
## 4: 36 4259 35 White 0 1 Current
## 5: 45 8438 75 White 0 1 Never
## 6: 52 8448 83 Black 0 1 Current
dim(first_incarceration_dt)
## [1] 579 7
n_first_inc_agents <- length(first_incarceration_dt$id)
first_incarceration_dt[, .(Count = .N,
Proportion = .N/n_first_inc_agents),
by = .(race)]
## race Count Proportion
## 1: White 256 0.44214162
## 2: Black 195 0.33678756
## 3: Hispanic 120 0.20725389
## 4: Asian 8 0.01381693
first_incarceration_dt[, .(Count = .N,
Proportion = .N/n_first_inc_agents),
by = .(female)]
## female Count Proportion
## 1: 0 552 0.95336788
## 2: 1 27 0.04663212
first_incarceration_dt[, .(Count = .N,
Proportion = .N/n_first_inc_agents),
by = .(alc_use_status)][
order(alc_use_status)
]
## alc_use_status Count Proportion
## 1: 0 194 0.33506045
## 2: 1 267 0.46113990
## 3: 2 16 0.02763385
## 4: 3 102 0.17616580
first_incarceration_dt[, .(Count = .N,
Proportion = .N/n_first_inc_agents),
by = .(smoking_status)]
## smoking_status Count Proportion
## 1: Current 391 0.6753022
## 2: Never 150 0.2590674
## 3: Former 38 0.0656304
# Analyze state distributions of repeat incarceration events ------------
repeat_incarceration_dt <- incarceration_dt[order(id, tick), .SD[-1], by = id]
head(repeat_incarceration_dt, 100)
## id tick age race female alc_use_status smoking_status
## 1: 12 364 51 Black 0 3 Current
## 2: 12 510 51 Black 0 3 Current
## 3: 12 776 52 Black 0 3 Current
## 4: 12 1001 53 Black 0 3 Current
## 5: 12 1226 53 Black 0 3 Current
## 6: 12 1441 54 Black 0 3 Current
## 7: 12 1516 54 Black 0 3 Current
## 8: 12 1538 54 Black 0 3 Current
## 9: 12 1576 54 Black 0 3 Current
## 10: 12 1675 55 Black 0 3 Current
## 11: 12 1720 55 Black 0 3 Current
## 12: 12 1898 55 Black 0 3 Current
## 13: 12 1914 55 Black 0 3 Current
## 14: 12 1957 55 Black 0 3 Current
## 15: 12 2075 56 Black 0 3 Current
## 16: 12 2174 56 Black 0 3 Current
## 17: 12 2320 56 Black 0 3 Current
## 18: 12 2394 56 Black 0 3 Current
## 19: 12 2450 57 Black 0 3 Current
## 20: 12 2504 57 Black 0 3 Current
## 21: 12 2659 57 Black 0 3 Current
## 22: 12 2809 58 Black 0 3 Current
## 23: 12 2871 58 Black 0 3 Current
## 24: 12 3061 58 Black 0 3 Current
## 25: 12 3237 59 Black 0 3 Current
## 26: 12 3379 59 Black 0 3 Current
## 27: 12 3573 60 Black 0 3 Current
## 28: 12 3617 60 Black 0 3 Current
## 29: 12 5279 64 Black 0 3 Current
## 30: 12 5415 65 Black 0 3 Current
## 31: 12 5514 65 Black 0 3 Current
## 32: 12 5553 65 Black 0 3 Current
## 33: 12 5577 65 Black 0 3 Current
## 34: 12 5679 65 Black 0 3 Current
## 35: 12 5906 66 Black 0 3 Current
## 36: 12 6116 67 Black 0 3 Current
## 37: 12 6201 67 Black 0 3 Current
## 38: 12 6520 68 Black 0 3 Current
## 39: 12 7311 70 Black 0 3 Current
## 40: 12 7444 70 Black 0 3 Current
## 41: 12 7931 72 Black 0 3 Current
## 42: 12 8001 72 Black 0 3 Current
## 43: 12 8044 72 Black 0 3 Current
## 44: 12 8071 72 Black 0 3 Current
## 45: 12 8228 72 Black 0 3 Current
## 46: 12 8326 73 Black 0 3 Current
## 47: 12 8422 73 Black 0 3 Current
## 48: 12 8480 73 Black 0 3 Current
## 49: 12 8674 74 Black 0 3 Current
## 50: 12 8950 74 Black 0 3 Current
## 51: 12 9017 75 Black 0 3 Current
## 52: 12 9372 76 Black 0 3 Current
## 53: 12 9494 76 Black 0 3 Current
## 54: 12 9671 76 Black 0 3 Current
## 55: 12 9734 77 Black 0 3 Current
## 56: 12 9994 77 Black 0 3 Current
## 57: 12 10238 78 Black 0 3 Current
## 58: 12 10399 78 Black 0 3 Current
## 59: 20 1222 73 Hispanic 0 0 Current
## 60: 20 1282 73 Hispanic 0 0 Current
## 61: 36 4653 36 White 0 1 Current
## 62: 129 10640 55 White 0 3 Current
## 63: 129 10820 55 White 0 3 Current
## 64: 187 2341 55 Black 0 1 Current
## 65: 187 2758 56 Black 0 1 Current
## 66: 187 5428 63 Black 0 1 Current
## 67: 187 5686 64 Black 0 1 Current
## 68: 187 10904 78 Black 0 1 Current
## 69: 238 3807 43 Hispanic 0 1 Current
## 70: 359 4180 52 Black 0 1 Current
## 71: 359 4214 52 Black 0 1 Current
## 72: 359 4523 53 Black 0 1 Current
## 73: 359 4687 54 Black 0 1 Current
## 74: 359 4883 54 Black 0 1 Current
## 75: 359 5222 55 Black 0 1 Current
## 76: 374 6147 74 Hispanic 0 3 Never
## 77: 402 3842 73 Hispanic 0 1 Current
## 78: 402 3928 74 Hispanic 0 1 Current
## 79: 402 4756 76 Hispanic 0 1 Current
## 80: 402 5647 78 Hispanic 0 1 Current
## 81: 423 742 65 Black 0 3 Current
## 82: 423 857 65 Black 0 3 Current
## 83: 423 984 65 Black 0 3 Current
## 84: 423 1177 66 Black 0 3 Current
## 85: 423 1344 66 Black 0 3 Current
## 86: 423 1717 67 Black 0 3 Former
## 87: 423 1888 68 Black 0 3 Current
## 88: 423 2120 68 Black 0 3 Current
## 89: 423 2211 69 Black 0 3 Current
## 90: 423 2313 69 Black 0 3 Current
## 91: 423 2387 69 Black 0 3 Current
## 92: 423 2590 70 Black 0 3 Current
## 93: 423 2628 70 Black 0 3 Current
## 94: 423 2643 70 Black 0 3 Current
## 95: 423 2835 70 Black 0 3 Current
## 96: 423 2961 71 Black 0 3 Current
## 97: 423 3493 72 Black 0 3 Current
## 98: 423 3586 72 Black 0 3 Current
## 99: 423 3722 73 Black 0 3 Current
## 100: 423 3961 73 Black 0 3 Current
## id tick age race female alc_use_status smoking_status
dim(repeat_incarceration_dt)
## [1] 1199 7
n_repeat_inc_events <- nrow(repeat_incarceration_dt)
repeat_incarceration_dt[, .(Count = .N,
Proportion = .N/n_repeat_inc_events),
by = .(race)]
## race Count Proportion
## 1: Black 881 0.734778982
## 2: Hispanic 197 0.164303586
## 3: White 119 0.099249374
## 4: Asian 2 0.001668057
repeat_incarceration_dt[, .(Count = .N,
Proportion = .N/n_repeat_inc_events),
by = .(female)]
## female Count Proportion
## 1: 0 1191 0.993327773
## 2: 1 8 0.006672227
repeat_incarceration_dt[, .(Count = .N,
Proportion = .N/n_repeat_inc_events),
by = .(alc_use_status)][
order(alc_use_status)
]
## alc_use_status Count Proportion
## 1: 0 226 0.18849041
## 2: 1 309 0.25771476
## 3: 2 55 0.04587156
## 4: 3 609 0.50792327
repeat_incarceration_dt[, .(Count = .N,
Proportion = .N/n_repeat_inc_events),
by = .(smoking_status)]
## smoking_status Count Proportion
## 1: Current 1128 0.94078399
## 2: Never 28 0.02335279
## 3: Former 43 0.03586322