Risk Data: Main VARS: SuicideRisk_Total / HomicideRiskTotal

latested_risk_by_client: data frame of most recently assessed risk levels

variables: StudyClientId, AssessmentYear, AssessmentType, SuicideRisk_Total, HomicideRiskTotal

Take a look at missing values of ChargeSeverity –> Non-labeled offense calss

TO DO: do we need to assign a new class to these offenses or do we just treat it as a binary variable instead (have offense history = 1, not have = 0)?

NOTE: the Offense Data DOES NOT have ClientID or any thing that could be used to reference another dataset. We might need to discard it.

Alcohol Drug Problem & Criminal History Data

Incident Data: Main VAR: IncidentScore (from Incident_byclient dataset)

All types of incidents are:

## IncidentDaTa$IncidentType 
##                                                       Frequency  Percent
## Aggression/Threatening Behaviors (Not to Person) [II]        24   5.7554
## Client Arrest [III]                                          60  14.3885
## Contraband Found [III]                                       89  21.3429
## Death [I]                                                    19   4.5564
## Narcan Administration [I]                                    36   8.6331
## Police Action [I]                                            64  15.3477
## Possession of Illicit/Illegal Substances [II]                31   7.4341
## Possession of Weapons [I]                                     1   0.2398
## Sexual Assault Alleged [I]                                   35   8.3933
## Suicide Attempt [I]                                           4   0.9592
## Threat to Person [I]                                         29   6.9544
## Violence (Physical) [I]                                      25   5.9952
## Total                                                       417 100.0000

Trauma Data: whether or not Clients have any trauma history

ASUS Data

#asus: median split
#ASUS --> INVOLVEMENT1 & DISRUPTION1
#LAST SIX MONTHS vars + Scale 9 ==> 8 vars
latest_asus <- AsusData %>% 
  group_by(StudyClientId) %>% 
  arrange(AssessmentYear) %>% 
  summarise(AodInvolvement = last(AodInvolvement1),
            Disruption = last(Disruption1),
            AodLastSixMonths = last(AodLastSixMonths),
            LegalNonConformingLastSixMonths = last(LegalNonConformingLastSixMonths),
            GlobalAodPsychosocialDistruptionAndProblems = last(GlobalAodPsychosocialDistruptionAndProblems),
            AssessmentYear = last(AssessmentYear),
            AssessmentType = last(AssessmentType))

ClientAllData: merged ClientData with Risk / Incident / Alcohol&Drug / Criminal History & Trauma & Address datasets

# merge all data frames together
ClientAllData <- ClientDaTa %>% 
  left_join(latest_criminal_by_client[,1:2], by="StudyClientId") %>%
  left_join(latest_drug_by_client[,1:2], by="StudyClientId") %>% 
  left_join(latest_risk_by_client[,1:3], by="StudyClientId") %>% 
  left_join(Incident_by_client[,1:2], by="StudyClientId") %>% 
  left_join(Trauma_by_client, by="StudyClientId") %>% 
  left_join(AdressDaTa %>% group_by(StudyClientId) %>% 
              summarise(AddressType = last(AddressType),
                        City = last(City),
                        State = last(State),
                        Zip = last(Zip)), by="StudyClientId") %>% 
  left_join(latest_asus[,1:6], by='StudyClientId')

# recode NA in the IncidentScore to be 0
ClientAllData$IncidentScore[is.na(ClientAllData$IncidentScore)] <- 0

ClientAllData %>%
  filter(CurrentAge >= 18) -> ClientAllData

Univariate Plots

library(ggplot2)

ggplot(data= na.omit(subset(ClientAllData, select = c(CriminalHistory))),
       aes(x=reorder(as.character(CriminalHistory), CriminalHistory), fill = as.character(CriminalHistory)))+
  geom_bar(na.rm = TRUE)+
  ggtitle("Clients with Criminal History")+
  xlab("Criminal History")

ggplot(data= na.omit(subset(ClientAllData, select = c(AlcoholDrugProblems))),
       aes(x=reorder(as.character(AlcoholDrugProblems), AlcoholDrugProblems), fill = as.character(AlcoholDrugProblems)))+
  geom_bar(na.rm = TRUE)+
  ggtitle("Clients with Alcohol and Drug Problems")+
  xlab("Alcohol and Drug Problems")

ggplot(data= na.omit(subset(ClientAllData, select = c(SuicideRisk_Total))),
       aes(x=reorder(as.character(SuicideRisk_Total), SuicideRisk_Total), fill = as.character(SuicideRisk_Total)))+
  geom_bar(na.rm = TRUE)+
  ggtitle("Risk of Suicide in Clients")+
  xlab("Suicide Risk")

ggplot(data= na.omit(subset(ClientAllData, select = c(HomicideRisk_Total))),
       aes(x=reorder(as.character(HomicideRisk_Total),HomicideRisk_Total), fill = as.character(HomicideRisk_Total)))+
  geom_bar(na.rm = TRUE)+
  ggtitle("Homicidal Risk of Clients")+
  xlab("Homicide Risk")

ggplot(data=ClientAllData)+
 geom_density(aes(x=IncidentScore))+
 ggtitle("Density Plot of Client Incidents")

ggplot(data= na.omit(subset(ClientAllData, select = c(TraumaHistory))),
       aes(x=reorder(as.character(TraumaHistory),TraumaHistory), fill = as.character(TraumaHistory)))+
  geom_bar(na.rm = TRUE)+
  ggtitle("Clients Assessed for Trauma")+
  xlab("Trauma")

require(tidyverse)
top4cities <- ClientAllData %>%
    group_by(City) %>%
    summarise(count = n()) %>%
    top_n(n = 5, wt = count)

top4cities <- na.omit(top4cities)

ggplot(data = top4cities, aes(x = City, y = count, fill= City)) +
    geom_col()+
  ggtitle("Clients by Top 4 Cities")

Regression Analyses

#Multiple Regression 
my.lm1 <- lm(IncidentScore~CriminalHistory+AlcoholDrugProblems+SuicideRisk_Total+HomicideRisk_Total+TraumaHistory, data= ClientAllData)
summary(my.lm1)
## 
## Call:
## lm(formula = IncidentScore ~ CriminalHistory + AlcoholDrugProblems + 
##     SuicideRisk_Total + HomicideRisk_Total + TraumaHistory, data = ClientAllData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0496 -0.4449 -0.3659 -0.2689 15.4116 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.21776    0.13902   1.566    0.118    
## CriminalHistory      0.03245    0.02198   1.476    0.140    
## AlcoholDrugProblems -0.01581    0.02469  -0.640    0.522    
## SuicideRisk_Total    0.25670    0.06162   4.166 3.42e-05 ***
## HomicideRisk_Total  -0.14014    0.15168  -0.924    0.356    
## TraumaHistory       -0.01669    0.10096  -0.165    0.869    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.438 on 833 degrees of freedom
##   (2745 observations deleted due to missingness)
## Multiple R-squared:  0.02313,    Adjusted R-squared:  0.01727 
## F-statistic: 3.945 on 5 and 833 DF,  p-value: 0.001529
my.lm2 <- lm(IncidentScore~CriminalHistory+AlcoholDrugProblems+SuicideRisk_Total+HomicideRisk_Total+TraumaHistory+CurrentAge+as.factor(MaritalStatus)+as.factor(Race), data= ClientAllData)
summary(my.lm2)
## 
## Call:
## lm(formula = IncidentScore ~ CriminalHistory + AlcoholDrugProblems + 
##     SuicideRisk_Total + HomicideRisk_Total + TraumaHistory + 
##     CurrentAge + as.factor(MaritalStatus) + as.factor(Race), 
##     data = ClientAllData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2387 -0.4964 -0.3571 -0.2083 15.2343 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                      -0.333225   1.191227  -0.280
## CriminalHistory                                   0.033498   0.022654   1.479
## AlcoholDrugProblems                              -0.022824   0.025781  -0.885
## SuicideRisk_Total                                 0.260025   0.063007   4.127
## HomicideRisk_Total                               -0.163307   0.153426  -1.064
## TraumaHistory                                    -0.016929   0.102674  -0.165
## CurrentAge                                        0.011144   0.004973   2.241
## as.factor(MaritalStatus)Civil Union              -0.342551   1.562909  -0.219
## as.factor(MaritalStatus)Divorced/Annulled        -0.107220   1.197983  -0.090
## as.factor(MaritalStatus)Legally separated         0.083069   1.251447   0.066
## as.factor(MaritalStatus)Married                   0.018202   1.198301   0.015
## as.factor(MaritalStatus)Not Specified             0.037473   1.204298   0.031
## as.factor(MaritalStatus)Single/Never Married      0.091131   1.182061   0.077
## as.factor(MaritalStatus)Widow/widower             0.629448   1.329528   0.473
## as.factor(Race)American Indian or Alaskan Native  0.410987   0.552294   0.744
## as.factor(Race)Asian                             -0.516238   1.044583  -0.494
## as.factor(Race)Caucasian or White                 0.063367   0.129017   0.491
## as.factor(Race)Multi-Racial                      -0.012667   0.221008  -0.057
## as.factor(Race)Not on file                       -0.272445   1.185920  -0.230
## as.factor(Race)Some other race                    0.002278   0.132248   0.017
## as.factor(Race)Undisclosed                       -0.715118   0.843170  -0.848
##                                                  Pr(>|t|)    
## (Intercept)                                        0.7798    
## CriminalHistory                                    0.1396    
## AlcoholDrugProblems                                0.3763    
## SuicideRisk_Total                                4.05e-05 ***
## HomicideRisk_Total                                 0.2875    
## TraumaHistory                                      0.8691    
## CurrentAge                                         0.0253 *  
## as.factor(MaritalStatus)Civil Union                0.8266    
## as.factor(MaritalStatus)Divorced/Annulled          0.9287    
## as.factor(MaritalStatus)Legally separated          0.9471    
## as.factor(MaritalStatus)Married                    0.9879    
## as.factor(MaritalStatus)Not Specified              0.9752    
## as.factor(MaritalStatus)Single/Never Married       0.9386    
## as.factor(MaritalStatus)Widow/widower              0.6360    
## as.factor(Race)American Indian or Alaskan Native   0.4570    
## as.factor(Race)Asian                               0.6213    
## as.factor(Race)Caucasian or White                  0.6235    
## as.factor(Race)Multi-Racial                        0.9543    
## as.factor(Race)Not on file                         0.8184    
## as.factor(Race)Some other race                     0.9863    
## as.factor(Race)Undisclosed                         0.3966    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.443 on 818 degrees of freedom
##   (2745 observations deleted due to missingness)
## Multiple R-squared:  0.03366,    Adjusted R-squared:  0.01004 
## F-statistic: 1.425 on 20 and 818 DF,  p-value: 0.1021
my.lm3 <- lm(IncidentScore ~ SuicideRisk_Total + CriminalHistory +
SuicideRisk_Total*AlcoholDrugProblems, data = ClientAllData)
summary(my.lm3)
## 
## Call:
## lm(formula = IncidentScore ~ SuicideRisk_Total + CriminalHistory + 
##     SuicideRisk_Total * AlcoholDrugProblems, data = ClientAllData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3771 -0.5345 -0.4358 -0.2884 15.2003 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.18975    0.12130   1.564   0.1179    
## SuicideRisk_Total                      0.48201    0.08178   5.894 4.59e-09 ***
## CriminalHistory                        0.04922    0.01988   2.476   0.0134 *  
## AlcoholDrugProblems                   -0.02451    0.02343  -1.046   0.2958    
## SuicideRisk_Total:AlcoholDrugProblems -0.03455    0.02075  -1.665   0.0961 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.763 on 1606 degrees of freedom
##   (1973 observations deleted due to missingness)
## Multiple R-squared:  0.04228,    Adjusted R-squared:  0.0399 
## F-statistic: 17.73 on 4 and 1606 DF,  p-value: 2.997e-14

Latent Class Analysis

Categorize variables for latent class analysis

  • RISK data: Suicide_class / Homicide_class ( low - medium - high)

    • SuicideRisk –> scale 0-8, <3 is low, 3-5 is medium, 6-8 is high

    • HomicideRisk -> scale 0-7, <2 is low, 2-3 is medium, > 3 is high

  • ASUS data: Aod1_class / Aod2_class (last 6 mos) / Disruption_class / LegalNonConform_class / LifeFunctionality_class ( low - medium - high)

  • Other: Trauma_class (yes - no/unknown) / age_group (level: 18-30 / 31-50 / >50)

# age groups: 18-30, 31-50, and > 50
ClientAllData <- ClientAllData %>% 
  mutate(age_group = case_when(CurrentAge <= 30 ~ '18-30',
                               CurrentAge %in% c(31:50) ~ '31-50',
                               CurrentAge > 50 ~ '>50'),
         # split AodInvolvement based on 0-0.35, 0.25-0.75, 0.75 quantiles
         Aod1_class = cut(AodInvolvement, 
                          c(-Inf, quantile(AodInvolvement, c(.25, .75),na.rm=TRUE), Inf), 
                          labels=c('low','medium','high')),
         # split AodLastSixMonths based on 0-0.35, 0.25-0.75, 0.75 quantiles
         Aod2_class = cut(AodLastSixMonths, 
                          c(-Inf, quantile(AodLastSixMonths, c(.25, .75),na.rm=TRUE), Inf), 
                          labels=c('low','medium','high')),
         # split Disruption based on 0-0.35, 0.25-0.75, 0.75 quantiles
         Disruption_class = cut(Disruption, 
                          c(-Inf, quantile(Disruption, c(.25, .75),na.rm=TRUE), Inf), 
                          labels=c('low','medium','high')),
         # split LegalNonConformingLastSixMonths based on 0-0.35, 0.25-0.75, 0.75 quantiles
         LegalNonConform_class = cut(LegalNonConformingLastSixMonths, 
                          c(-Inf, quantile(LegalNonConformingLastSixMonths, c(.25, .75),na.rm=TRUE), Inf), 
                          labels=c('low','medium','high')),
         # split GlobalPsychological problems based on 0-0.35, 0.25-0.75, 0.75 quantiles
         LifeFunctionality_class = cut(GlobalAodPsychosocialDistruptionAndProblems, 
                          c(-Inf, quantile(GlobalAodPsychosocialDistruptionAndProblems, c(.25, .75),na.rm=TRUE), Inf), 
                          labels=c('low','medium','high')),
         Suicide_class = case_when(SuicideRisk_Total < 3 ~ 'low',
                                   SuicideRisk_Total %in% 3:5 ~ 'medium',
                                   SuicideRisk_Total > 5 ~ 'high'),         
         Homicide_class = case_when(SuicideRisk_Total < 2 ~ 'low',
                                   SuicideRisk_Total %in% 2:3 ~ 'medium',
                                   SuicideRisk_Total > 3 ~ 'high'),
         Trauma_class = ifelse(TraumaHistory == 1, "yes", "no/unknown")
         )

ClientAllData$Trauma_class[is.na(ClientAllData$Trauma_class)] <- "no/unknown"