Total combined dataset with both Docket cases and OSS search. Of docket 397 cases were reviewed; of these 176 were included.

In total we analyzed 767 cases. This includes the Westlaw and Lexis OSS.

library(readxl)
JD <- read_excel("C:/Users/telsabawi/Dropbox/1-Research/1- GW BOP White Paper/Datasets/combined_Jail_deaths_data_3.20.21.xlsx", 
    na = "NA")

Subset Data

I have divided the dataset into two samples: OSS & DOC. OSS has 214 observations and it was created from the summary sheets of Verdicts & Settlement databases of both Lexis Nexis and Westlaw. DOC was data extracted from a sample of dockets in Lexis Nexis and has 159 observations. The goal here is to then test to see if there is any statistically significant difference in the two samples. The concern was that the Verdict & Settlement databases may have skewed results since it is comprised of case outcomes that were reported to publications (by attorneys or media). Whereas the docket, is the population of cases that are included in Lexis Nexis’ docket search that fit the inclusion criteria. We are testing to see if the Verdicts & Settlements databases gave us results that were different in the variables analyzed – compared to the Dockets search.

OSS <- JD[1:214,]
DOC <- JD[215:374,]
#Add new variable representing the source in each
##df['Name']='abc'
OSS['Src']='1'
DOC['Src']='2'
#Merging the two dataframes horizontally
##total <- rbind(data frameA, data frameB)
JD2 <- rbind(OSS, DOC)
library(knitr)
library(kableExtra)
library(MASS)
library(mlogit)
########################Formatting All Variables
JD2$OFC_SUE<-factor(JD2$OFC_SUE,
                labels = c(`0` = "No", `1` = "Yes"))

JD2$HEA_SUE<-factor(JD2$HEA_SUE,
                labels = c(`0` = "No", `1` = "Yes"))

JD2$PHYS<-factor(JD2$PHYS,
                labels = c(`0` = "No", `1` = "Yes"))

JD2$CRT<-factor(JD2$CRT,labels = c(`1` = "Federal", `2` = "State"))

JD2$City_Sue <-factor(JD2$City_Sue,
                     labels = c(`0` = "No", `1` = "Yes"))

JD2$Jail_Sue <-factor(JD2$Jail_Sue, 
                     labels = c(`0` = "No", `1` = "Yes"))

JD2$Officer_Sue <-factor(JD2$Officer_Sue, labels = c(`0` = "No", `1` = "Yes"))
  
JD2$OUTCOME<-as.factor(JD2$OUTCOME)

JD2$STATE<-as.factor(JD2$STATE)

JD2$COUNTY_CIT<-as.factor(JD2$COUNTY_CIT)

JD2$PREX<-as.factor(JD2$PREX)

JD2$MENTAL <- factor(JD2$MENTAL,labels = c(`0` = "No", `1` = "Yes"))

JD2$SU <- factor(JD2$SU,labels = c(`0` = "No", `1` = "Yes"))

JD2$PHYS <- factor(JD2$PHYS,labels = c(`0` = "No", `1` = "Yes"))

JD2$COD_CAT<-as.factor(JD2$COD_CAT)

JD2$Withdraw <- factor(JD2$Withdraw,labels = c(`0` = "No", `1` = "Yes"))

JD2$Src <- factor(JD2$Src,labels = c(`1`= "OSS", `2`="Doc" ) )

JD2$AGE <- as.numeric(JD2$AGE)

JD2$TTL <- as.numeric(JD2$TTL)

#loop via lapply multiple variables to factor

#names <- c(19:33)
#JD2[,names] <- lapply(JD2[,names] , factor)


JD2$Suic_Idea<-factor(JD2$Suic_Idea, labels = c(`0` = "No", `1` = "Yes"))

Court

L1 <- glm(CRT ~ Src, data = JD2, family = "binomial")
summary(L1)
## 
## Call:
## glm(formula = CRT ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6085  -0.6085  -0.5323  -0.5323   2.0122  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5926     0.1828  -8.711   <2e-16 ***
## SrcDoc       -0.2901     0.2971  -0.976    0.329    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 318.63  on 371  degrees of freedom
## Residual deviance: 317.66  on 370  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 321.66
## 
## Number of Fisher Scoring iterations: 4

Law Enforcement Officials in Official Capacity

L2 <- glm(OFC_SUE ~ Src, data = JD2, family = "binomial")
summary(L2)
## 
## Call:
## glm(formula = OFC_SUE ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4324  -1.1398   0.9423   0.9423   1.2155  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.5819     0.1432   4.063 4.84e-05 ***
## SrcDoc       -0.6712     0.2146  -3.128  0.00176 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.90  on 368  degrees of freedom
## Residual deviance: 494.01  on 367  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 498.01
## 
## Number of Fisher Scoring iterations: 4

Law Enforcement Officers Personally

L3 <- glm(OFC_SUE ~ Src, data = JD2, family = "binomial")
summary(L3)
## 
## Call:
## glm(formula = OFC_SUE ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4324  -1.1398   0.9423   0.9423   1.2155  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.5819     0.1432   4.063 4.84e-05 ***
## SrcDoc       -0.6712     0.2146  -3.128  0.00176 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.90  on 368  degrees of freedom
## Residual deviance: 494.01  on 367  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 498.01
## 
## Number of Fisher Scoring iterations: 4

Jails

L4 <- glm(Jail_Sue ~ Src, data = JD2, family = "binomial")
summary(L4)
## 
## Call:
## glm(formula = Jail_Sue ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.235  -1.190   1.121   1.165   1.165  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.03031    0.17410   0.174    0.862
## SrcDoc       0.10323    0.31189   0.331    0.741
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 265.98  on 191  degrees of freedom
## Residual deviance: 265.87  on 190  degrees of freedom
##   (182 observations deleted due to missingness)
## AIC: 269.87
## 
## Number of Fisher Scoring iterations: 3

Counties or Cities

L4 <- glm(City_Sue ~ Src, data = JD2, family = "binomial")
summary(L4)
## 
## Call:
## glm(formula = City_Sue ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7022  -1.5518   0.7317   0.7317   0.8446  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.1811     0.2053   5.752  8.8e-09 ***
## SrcDoc       -0.3338     0.3486  -0.958    0.338    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 218.11  on 191  degrees of freedom
## Residual deviance: 217.20  on 190  degrees of freedom
##   (182 observations deleted due to missingness)
## AIC: 221.2
## 
## Number of Fisher Scoring iterations: 4

Healthcare

L5 <- glm(HEA_SUE ~ Src, data = JD2, family = "binomial")
summary(L5)
## 
## Call:
## glm(formula = HEA_SUE ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.272  -1.272   1.086   1.086   1.150  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.2189     0.1385   1.580    0.114
## SrcDoc       -0.1552     0.2114  -0.734    0.463
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 508.02  on 367  degrees of freedom
## Residual deviance: 507.48  on 366  degrees of freedom
##   (6 observations deleted due to missingness)
## AIC: 511.48
## 
## Number of Fisher Scoring iterations: 3

Outcomes

summary(glm(OUTCOME ~ Src, data = JD2, family = "binomial"))
## 
## Call:
## glm(formula = OUTCOME ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4478   0.3203   0.3203   0.7515   0.7515  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.9444     0.3420   8.610  < 2e-16 ***
## SrcDoc       -1.8245     0.3997  -4.565 4.99e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 237.30  on 305  degrees of freedom
## Residual deviance: 212.06  on 304  degrees of freedom
##   (68 observations deleted due to missingness)
## AIC: 216.06
## 
## Number of Fisher Scoring iterations: 5

#TTL

summary(lm(TTL ~ Src, data = JD2))
## 
## Call:
## lm(formula = TTL ~ Src, data = JD2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1540383 -1334883  -563471  -195971 13459617 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1540383     214642   7.177 1.11e-11 ***
## SrcDoc       -976913     347365  -2.812  0.00537 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2503000 on 218 degrees of freedom
##   (154 observations deleted due to missingness)
## Multiple R-squared:  0.03501,    Adjusted R-squared:  0.03058 
## F-statistic: 7.909 on 1 and 218 DF,  p-value: 0.005367

Decendent Demographics:

Age

summary(lm(AGE ~ Src, data = JD2))
## 
## Call:
## lm(formula = AGE ~ Src, data = JD2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.058 -10.058  -2.648  10.942  39.942 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.0579     0.8819  42.022   <2e-16 ***
## SrcDoc        0.5900     1.6908   0.349    0.727    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.16 on 259 degrees of freedom
##   (113 observations deleted due to missingness)
## Multiple R-squared:  0.0004699,  Adjusted R-squared:  -0.003389 
## F-statistic: 0.1218 on 1 and 259 DF,  p-value: 0.7274

Cause of Death

Nominal categorical dependent variable

summary(glm(COD_CAT ~ Src, data = JD2, family = "binomial"))
## 
## Call:
## glm(formula = COD_CAT ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -3.275   0.097   0.097   0.355   0.355  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    5.357      1.002   5.344 9.09e-08 ***
## SrcDoc        -2.624      1.067  -2.460   0.0139 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.344  on 343  degrees of freedom
## Residual deviance: 72.951  on 342  degrees of freedom
##   (30 observations deleted due to missingness)
## AIC: 76.951
## 
## Number of Fisher Scoring iterations: 8

Evidence of Mental Illiness Present

summary(glm(MENTAL ~ Src, data = JD2, family = "binomial"))
## 
## Call:
## glm(formula = MENTAL ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.034  -1.022  -1.022   1.342   1.342  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -0.37828    0.13917  -2.718  0.00657 **
## SrcDoc       0.02997    0.23432   0.128  0.89822   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 446.51  on 329  degrees of freedom
## Residual deviance: 446.49  on 328  degrees of freedom
##   (44 observations deleted due to missingness)
## AIC: 450.49
## 
## Number of Fisher Scoring iterations: 4

Evidence of Substance Use Issue Present

summary(glm(SU ~ Src, data = JD2, family = "binomial"))
## 
## Call:
## glm(formula = SU ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0447  -1.0447  -0.4968   1.3162   2.0754  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.3205     0.1385  -2.314   0.0207 *  
## SrcDoc       -1.7097     0.3259  -5.246 1.55e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 406.70  on 325  degrees of freedom
## Residual deviance: 371.66  on 324  degrees of freedom
##   (48 observations deleted due to missingness)
## AIC: 375.66
## 
## Number of Fisher Scoring iterations: 4

Evidence of Physical Health Issue Present

summary(glm(PHYS ~ Src, data = JD2, family = "binomial"))
## 
## Call:
## glm(formula = PHYS ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1037  -0.8351  -0.8351   1.2530   1.5639  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.8741     0.1500  -5.828  5.6e-09 ***
## SrcDoc        0.6983     0.2405   2.903   0.0037 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 424.97  on 327  degrees of freedom
## Residual deviance: 416.54  on 326  degrees of freedom
##   (46 observations deleted due to missingness)
## AIC: 420.54
## 
## Number of Fisher Scoring iterations: 4

Evidence of Withdrawal Symptoms

summary(glm(Withdraw ~ Src, data = JD2, family = "binomial"))
## 
## Call:
## glm(formula = Withdraw ~ Src, family = "binomial", data = JD2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9830  -0.9830  -0.3609   1.3851   2.3510  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.4761     0.1406  -3.386  0.00071 ***
## SrcDoc       -2.2224     0.4150  -5.355 8.56e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 381.58  on 324  degrees of freedom
## Residual deviance: 337.12  on 323  degrees of freedom
##   (49 observations deleted due to missingness)
## AIC: 341.12
## 
## Number of Fisher Scoring iterations: 5