# Analyze incarceration data (This is from the agent log, there is a separate incarceartion log)

rm(list=ls())


# Load R environment ---------

renv::activate()


# Load packages ---------

library(here)
## here() starts at /users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r
library(data.table)
library(yaml)
library(ggplot2)


# Read RDS file ------------

incarceration_log_env <- 
  readRDS("/users/akhann16/code/cadre/data-analysis-plotting/Simulated-Data-Analysis/r/incarceration-log-analysis/rds-outs/incarceration_log_env.RDS")

names(incarceration_log_env)
## [1] "incarceration_dt"
# Load data ------------

incarceration_dt <- incarceration_log_env[["incarceration_dt"]]


# Explore data ------------

str(incarceration_dt)
## Classes 'data.table' and 'data.frame':   1778 obs. of  7 variables:
##  $ tick          : int  15 34 53 55 57 64 75 84 85 101 ...
##  $ id            : int  7950 4510 6285 1935 4137 8400 8969 6392 7352 8810 ...
##  $ age           : int  77 50 53 80 38 28 74 81 20 64 ...
##  $ race          : chr  "Hispanic" "Hispanic" "White" "White" ...
##  $ female        : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ alc_use_status: int  1 0 3 0 1 0 1 3 1 2 ...
##  $ smoking_status: chr  "Current" "Current" "Current" "Never" ...
##  - attr(*, ".internal.selfref")=<externalptr>
dim(incarceration_dt)
## [1] 1778    7
incarceration_dt[,.N]
## [1] 1778
# Analyze state distributions for all incarceration events ------------

n_inc_events <- length(incarceration_dt$id)


incarceration_dt[, .(Count = .N, 
                     Proportion = .N/nrow(incarceration_dt)), 
                 by = .(race)]
##        race Count  Proportion
## 1: Hispanic   317 0.178290214
## 2:    White   375 0.210911136
## 3:    Black  1076 0.605174353
## 4:    Asian    10 0.005624297
incarceration_dt[, .(Count = .N, 
                     Proportion = .N/nrow(incarceration_dt)), 
                 by = .(female)]
##    female Count Proportion
## 1:      0  1743 0.98031496
## 2:      1    35 0.01968504
incarceration_dt[, .(Count = .N, 
                     Proportion = .N/nrow(incarceration_dt)), 
                 by = .(alc_use_status)]
##    alc_use_status Count Proportion
## 1:              1   576 0.32395951
## 2:              0   420 0.23622047
## 3:              3   711 0.39988751
## 4:              2    71 0.03993251
incarceration_dt[, .(Count = .N, 
                     Proportion = .N/nrow(incarceration_dt)), 
                 by = .(smoking_status)]
##    smoking_status Count Proportion
## 1:        Current  1519 0.85433071
## 2:          Never   178 0.10011249
## 3:         Former    81 0.04555681
# Analyze state distributions for first-time incarceration events ------------

first_incarceration_dt <- incarceration_dt[order(id, tick), .SD[1], by = id]

head(first_incarceration_dt)
##    id tick age     race female alc_use_status smoking_status
## 1:  9 6753  64    White      0              0        Current
## 2: 12  199  50    Black      0              3        Current
## 3: 20  409  70 Hispanic      0              0        Current
## 4: 36 4259  35    White      0              1        Current
## 5: 45 8438  75    White      0              1          Never
## 6: 52 8448  83    Black      0              1        Current
dim(first_incarceration_dt)
## [1] 579   7
n_first_inc_agents <- length(first_incarceration_dt$id)

first_incarceration_dt[, .(Count = .N, 
                     Proportion = .N/n_first_inc_agents), 
                 by = .(race)]
##        race Count Proportion
## 1:    White   256 0.44214162
## 2:    Black   195 0.33678756
## 3: Hispanic   120 0.20725389
## 4:    Asian     8 0.01381693
first_incarceration_dt[, .(Count = .N, 
                           Proportion = .N/n_first_inc_agents), 
                       by = .(female)]
##    female Count Proportion
## 1:      0   552 0.95336788
## 2:      1    27 0.04663212
first_incarceration_dt[, .(Count = .N, 
                           Proportion = .N/n_first_inc_agents), 
                       by = .(alc_use_status)][
                         order(alc_use_status)
                         ]
##    alc_use_status Count Proportion
## 1:              0   194 0.33506045
## 2:              1   267 0.46113990
## 3:              2    16 0.02763385
## 4:              3   102 0.17616580
first_incarceration_dt[, .(Count = .N, 
                           Proportion = .N/n_first_inc_agents), 
                       by = .(smoking_status)]
##    smoking_status Count Proportion
## 1:        Current   391  0.6753022
## 2:          Never   150  0.2590674
## 3:         Former    38  0.0656304
# Analyze state distributions of repeat incarceration events ------------

repeat_incarceration_dt <- incarceration_dt[order(id, tick), .SD[-1], by = id]
head(repeat_incarceration_dt, 100)
##       id  tick age     race female alc_use_status smoking_status
##   1:  12   364  51    Black      0              3        Current
##   2:  12   510  51    Black      0              3        Current
##   3:  12   776  52    Black      0              3        Current
##   4:  12  1001  53    Black      0              3        Current
##   5:  12  1226  53    Black      0              3        Current
##   6:  12  1441  54    Black      0              3        Current
##   7:  12  1516  54    Black      0              3        Current
##   8:  12  1538  54    Black      0              3        Current
##   9:  12  1576  54    Black      0              3        Current
##  10:  12  1675  55    Black      0              3        Current
##  11:  12  1720  55    Black      0              3        Current
##  12:  12  1898  55    Black      0              3        Current
##  13:  12  1914  55    Black      0              3        Current
##  14:  12  1957  55    Black      0              3        Current
##  15:  12  2075  56    Black      0              3        Current
##  16:  12  2174  56    Black      0              3        Current
##  17:  12  2320  56    Black      0              3        Current
##  18:  12  2394  56    Black      0              3        Current
##  19:  12  2450  57    Black      0              3        Current
##  20:  12  2504  57    Black      0              3        Current
##  21:  12  2659  57    Black      0              3        Current
##  22:  12  2809  58    Black      0              3        Current
##  23:  12  2871  58    Black      0              3        Current
##  24:  12  3061  58    Black      0              3        Current
##  25:  12  3237  59    Black      0              3        Current
##  26:  12  3379  59    Black      0              3        Current
##  27:  12  3573  60    Black      0              3        Current
##  28:  12  3617  60    Black      0              3        Current
##  29:  12  5279  64    Black      0              3        Current
##  30:  12  5415  65    Black      0              3        Current
##  31:  12  5514  65    Black      0              3        Current
##  32:  12  5553  65    Black      0              3        Current
##  33:  12  5577  65    Black      0              3        Current
##  34:  12  5679  65    Black      0              3        Current
##  35:  12  5906  66    Black      0              3        Current
##  36:  12  6116  67    Black      0              3        Current
##  37:  12  6201  67    Black      0              3        Current
##  38:  12  6520  68    Black      0              3        Current
##  39:  12  7311  70    Black      0              3        Current
##  40:  12  7444  70    Black      0              3        Current
##  41:  12  7931  72    Black      0              3        Current
##  42:  12  8001  72    Black      0              3        Current
##  43:  12  8044  72    Black      0              3        Current
##  44:  12  8071  72    Black      0              3        Current
##  45:  12  8228  72    Black      0              3        Current
##  46:  12  8326  73    Black      0              3        Current
##  47:  12  8422  73    Black      0              3        Current
##  48:  12  8480  73    Black      0              3        Current
##  49:  12  8674  74    Black      0              3        Current
##  50:  12  8950  74    Black      0              3        Current
##  51:  12  9017  75    Black      0              3        Current
##  52:  12  9372  76    Black      0              3        Current
##  53:  12  9494  76    Black      0              3        Current
##  54:  12  9671  76    Black      0              3        Current
##  55:  12  9734  77    Black      0              3        Current
##  56:  12  9994  77    Black      0              3        Current
##  57:  12 10238  78    Black      0              3        Current
##  58:  12 10399  78    Black      0              3        Current
##  59:  20  1222  73 Hispanic      0              0        Current
##  60:  20  1282  73 Hispanic      0              0        Current
##  61:  36  4653  36    White      0              1        Current
##  62: 129 10640  55    White      0              3        Current
##  63: 129 10820  55    White      0              3        Current
##  64: 187  2341  55    Black      0              1        Current
##  65: 187  2758  56    Black      0              1        Current
##  66: 187  5428  63    Black      0              1        Current
##  67: 187  5686  64    Black      0              1        Current
##  68: 187 10904  78    Black      0              1        Current
##  69: 238  3807  43 Hispanic      0              1        Current
##  70: 359  4180  52    Black      0              1        Current
##  71: 359  4214  52    Black      0              1        Current
##  72: 359  4523  53    Black      0              1        Current
##  73: 359  4687  54    Black      0              1        Current
##  74: 359  4883  54    Black      0              1        Current
##  75: 359  5222  55    Black      0              1        Current
##  76: 374  6147  74 Hispanic      0              3          Never
##  77: 402  3842  73 Hispanic      0              1        Current
##  78: 402  3928  74 Hispanic      0              1        Current
##  79: 402  4756  76 Hispanic      0              1        Current
##  80: 402  5647  78 Hispanic      0              1        Current
##  81: 423   742  65    Black      0              3        Current
##  82: 423   857  65    Black      0              3        Current
##  83: 423   984  65    Black      0              3        Current
##  84: 423  1177  66    Black      0              3        Current
##  85: 423  1344  66    Black      0              3        Current
##  86: 423  1717  67    Black      0              3         Former
##  87: 423  1888  68    Black      0              3        Current
##  88: 423  2120  68    Black      0              3        Current
##  89: 423  2211  69    Black      0              3        Current
##  90: 423  2313  69    Black      0              3        Current
##  91: 423  2387  69    Black      0              3        Current
##  92: 423  2590  70    Black      0              3        Current
##  93: 423  2628  70    Black      0              3        Current
##  94: 423  2643  70    Black      0              3        Current
##  95: 423  2835  70    Black      0              3        Current
##  96: 423  2961  71    Black      0              3        Current
##  97: 423  3493  72    Black      0              3        Current
##  98: 423  3586  72    Black      0              3        Current
##  99: 423  3722  73    Black      0              3        Current
## 100: 423  3961  73    Black      0              3        Current
##       id  tick age     race female alc_use_status smoking_status
dim(repeat_incarceration_dt)
## [1] 1199    7
n_repeat_inc_events <- nrow(repeat_incarceration_dt)

repeat_incarceration_dt[, .(Count = .N, 
                            Proportion = .N/n_repeat_inc_events), 
                        by = .(race)]
##        race Count  Proportion
## 1:    Black   881 0.734778982
## 2: Hispanic   197 0.164303586
## 3:    White   119 0.099249374
## 4:    Asian     2 0.001668057
repeat_incarceration_dt[, .(Count = .N, 
                            Proportion = .N/n_repeat_inc_events), 
                        by = .(female)]
##    female Count  Proportion
## 1:      0  1191 0.993327773
## 2:      1     8 0.006672227
repeat_incarceration_dt[, .(Count = .N, 
                            Proportion = .N/n_repeat_inc_events), 
                        by = .(alc_use_status)][
                          order(alc_use_status)
                        ]
##    alc_use_status Count Proportion
## 1:              0   226 0.18849041
## 2:              1   309 0.25771476
## 3:              2    55 0.04587156
## 4:              3   609 0.50792327
repeat_incarceration_dt[, .(Count = .N, 
                            Proportion = .N/n_repeat_inc_events), 
                        by = .(smoking_status)]
##    smoking_status Count Proportion
## 1:        Current  1128 0.94078399
## 2:          Never    28 0.02335279
## 3:         Former    43 0.03586322