Total combined dataset with both Docket cases and OSS search. Of docket 397 cases were reviewed; of these 176 were included.
In total we analyzed 767 cases. This includes the Westlaw and Lexis OSS.
library(readxl)
JD <- read_excel("C:/Users/telsabawi/Dropbox/1-Research/1- GW BOP White Paper/Datasets/combined_Jail_deaths_data_3.20.21.xlsx",
na = "NA")
I have divided the dataset into two samples: OSS & DOC. OSS has 214 observations and it was created from the summary sheets of Verdicts & Settlement databases of both Lexis Nexis and Westlaw. DOC was data extracted from a sample of dockets in Lexis Nexis and has 159 observations. The goal here is to then test to see if there is any statistically significant difference in the two samples. The concern was that the Verdict & Settlement databases may have skewed results since it is comprised of case outcomes that were reported to publications (by attorneys or media). Whereas the docket, is the population of cases that are included in Lexis Nexis’ docket search that fit the inclusion criteria. We are testing to see if the Verdicts & Settlements databases gave us results that were different in the variables analyzed – compared to the Dockets search.
OSS <- JD[1:214,]
DOC <- JD[215:374,]
#Add new variable representing the source in each
##df['Name']='abc'
OSS['Src']='1'
DOC['Src']='2'
#Merging the two dataframes horizontally
##total <- rbind(data frameA, data frameB)
JD2 <- rbind(OSS, DOC)
library(knitr)
library(kableExtra)
library(MASS)
library(mlogit)
########################Formatting All Variables
JD2$OFC_SUE<-factor(JD2$OFC_SUE,
labels = c(`0` = "No", `1` = "Yes"))
JD2$HEA_SUE<-factor(JD2$HEA_SUE,
labels = c(`0` = "No", `1` = "Yes"))
JD2$PHYS<-factor(JD2$PHYS,
labels = c(`0` = "No", `1` = "Yes"))
JD2$CRT<-factor(JD2$CRT,labels = c(`1` = "Federal", `2` = "State"))
JD2$City_Sue <-factor(JD2$City_Sue,
labels = c(`0` = "No", `1` = "Yes"))
JD2$Jail_Sue <-factor(JD2$Jail_Sue,
labels = c(`0` = "No", `1` = "Yes"))
JD2$Officer_Sue <-factor(JD2$Officer_Sue, labels = c(`0` = "No", `1` = "Yes"))
JD2$OUTCOME<-as.factor(JD2$OUTCOME)
JD2$STATE<-as.factor(JD2$STATE)
JD2$COUNTY_CIT<-as.factor(JD2$COUNTY_CIT)
JD2$PREX<-as.factor(JD2$PREX)
JD2$MENTAL <- factor(JD2$MENTAL,labels = c(`0` = "No", `1` = "Yes"))
JD2$SU <- factor(JD2$SU,labels = c(`0` = "No", `1` = "Yes"))
JD2$PHYS <- factor(JD2$PHYS,labels = c(`0` = "No", `1` = "Yes"))
JD2$COD_CAT<-as.factor(JD2$COD_CAT)
JD2$Withdraw <- factor(JD2$Withdraw,labels = c(`0` = "No", `1` = "Yes"))
JD2$Src <- factor(JD2$Src,labels = c(`1`= "OSS", `2`="Doc" ) )
JD2$AGE <- as.numeric(JD2$AGE)
JD2$TTL <- as.numeric(JD2$TTL)
#loop via lapply multiple variables to factor
#names <- c(19:33)
#JD2[,names] <- lapply(JD2[,names] , factor)
JD2$Suic_Idea<-factor(JD2$Suic_Idea, labels = c(`0` = "No", `1` = "Yes"))
L1 <- glm(CRT ~ Src, data = JD2, family = "binomial")
summary(L1)
##
## Call:
## glm(formula = CRT ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6085 -0.6085 -0.5323 -0.5323 2.0122
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5926 0.1828 -8.711 <2e-16 ***
## SrcDoc -0.2901 0.2971 -0.976 0.329
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 318.63 on 371 degrees of freedom
## Residual deviance: 317.66 on 370 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 321.66
##
## Number of Fisher Scoring iterations: 4
L2 <- glm(OFC_SUE ~ Src, data = JD2, family = "binomial")
summary(L2)
##
## Call:
## glm(formula = OFC_SUE ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4324 -1.1398 0.9423 0.9423 1.2155
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5819 0.1432 4.063 4.84e-05 ***
## SrcDoc -0.6712 0.2146 -3.128 0.00176 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 503.90 on 368 degrees of freedom
## Residual deviance: 494.01 on 367 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 498.01
##
## Number of Fisher Scoring iterations: 4
L3 <- glm(OFC_SUE ~ Src, data = JD2, family = "binomial")
summary(L3)
##
## Call:
## glm(formula = OFC_SUE ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4324 -1.1398 0.9423 0.9423 1.2155
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5819 0.1432 4.063 4.84e-05 ***
## SrcDoc -0.6712 0.2146 -3.128 0.00176 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 503.90 on 368 degrees of freedom
## Residual deviance: 494.01 on 367 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 498.01
##
## Number of Fisher Scoring iterations: 4
L4 <- glm(Jail_Sue ~ Src, data = JD2, family = "binomial")
summary(L4)
##
## Call:
## glm(formula = Jail_Sue ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.235 -1.190 1.121 1.165 1.165
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.03031 0.17410 0.174 0.862
## SrcDoc 0.10323 0.31189 0.331 0.741
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 265.98 on 191 degrees of freedom
## Residual deviance: 265.87 on 190 degrees of freedom
## (182 observations deleted due to missingness)
## AIC: 269.87
##
## Number of Fisher Scoring iterations: 3
L4 <- glm(City_Sue ~ Src, data = JD2, family = "binomial")
summary(L4)
##
## Call:
## glm(formula = City_Sue ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7022 -1.5518 0.7317 0.7317 0.8446
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1811 0.2053 5.752 8.8e-09 ***
## SrcDoc -0.3338 0.3486 -0.958 0.338
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 218.11 on 191 degrees of freedom
## Residual deviance: 217.20 on 190 degrees of freedom
## (182 observations deleted due to missingness)
## AIC: 221.2
##
## Number of Fisher Scoring iterations: 4
L5 <- glm(HEA_SUE ~ Src, data = JD2, family = "binomial")
summary(L5)
##
## Call:
## glm(formula = HEA_SUE ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.272 -1.272 1.086 1.086 1.150
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2189 0.1385 1.580 0.114
## SrcDoc -0.1552 0.2114 -0.734 0.463
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 508.02 on 367 degrees of freedom
## Residual deviance: 507.48 on 366 degrees of freedom
## (6 observations deleted due to missingness)
## AIC: 511.48
##
## Number of Fisher Scoring iterations: 3
summary(glm(OUTCOME ~ Src, data = JD2, family = "binomial"))
##
## Call:
## glm(formula = OUTCOME ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4478 0.3203 0.3203 0.7515 0.7515
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.9444 0.3420 8.610 < 2e-16 ***
## SrcDoc -1.8245 0.3997 -4.565 4.99e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 237.30 on 305 degrees of freedom
## Residual deviance: 212.06 on 304 degrees of freedom
## (68 observations deleted due to missingness)
## AIC: 216.06
##
## Number of Fisher Scoring iterations: 5
#TTL
summary(lm(TTL ~ Src, data = JD2))
##
## Call:
## lm(formula = TTL ~ Src, data = JD2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1540383 -1334883 -563471 -195971 13459617
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1540383 214642 7.177 1.11e-11 ***
## SrcDoc -976913 347365 -2.812 0.00537 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2503000 on 218 degrees of freedom
## (154 observations deleted due to missingness)
## Multiple R-squared: 0.03501, Adjusted R-squared: 0.03058
## F-statistic: 7.909 on 1 and 218 DF, p-value: 0.005367
Age
summary(lm(AGE ~ Src, data = JD2))
##
## Call:
## lm(formula = AGE ~ Src, data = JD2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.058 -10.058 -2.648 10.942 39.942
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.0579 0.8819 42.022 <2e-16 ***
## SrcDoc 0.5900 1.6908 0.349 0.727
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.16 on 259 degrees of freedom
## (113 observations deleted due to missingness)
## Multiple R-squared: 0.0004699, Adjusted R-squared: -0.003389
## F-statistic: 0.1218 on 1 and 259 DF, p-value: 0.7274
Nominal categorical dependent variable
summary(glm(COD_CAT ~ Src, data = JD2, family = "binomial"))
##
## Call:
## glm(formula = COD_CAT ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.275 0.097 0.097 0.355 0.355
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.357 1.002 5.344 9.09e-08 ***
## SrcDoc -2.624 1.067 -2.460 0.0139 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.344 on 343 degrees of freedom
## Residual deviance: 72.951 on 342 degrees of freedom
## (30 observations deleted due to missingness)
## AIC: 76.951
##
## Number of Fisher Scoring iterations: 8
summary(glm(MENTAL ~ Src, data = JD2, family = "binomial"))
##
## Call:
## glm(formula = MENTAL ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.034 -1.022 -1.022 1.342 1.342
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.37828 0.13917 -2.718 0.00657 **
## SrcDoc 0.02997 0.23432 0.128 0.89822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 446.51 on 329 degrees of freedom
## Residual deviance: 446.49 on 328 degrees of freedom
## (44 observations deleted due to missingness)
## AIC: 450.49
##
## Number of Fisher Scoring iterations: 4
summary(glm(SU ~ Src, data = JD2, family = "binomial"))
##
## Call:
## glm(formula = SU ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0447 -1.0447 -0.4968 1.3162 2.0754
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3205 0.1385 -2.314 0.0207 *
## SrcDoc -1.7097 0.3259 -5.246 1.55e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 406.70 on 325 degrees of freedom
## Residual deviance: 371.66 on 324 degrees of freedom
## (48 observations deleted due to missingness)
## AIC: 375.66
##
## Number of Fisher Scoring iterations: 4
summary(glm(PHYS ~ Src, data = JD2, family = "binomial"))
##
## Call:
## glm(formula = PHYS ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1037 -0.8351 -0.8351 1.2530 1.5639
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8741 0.1500 -5.828 5.6e-09 ***
## SrcDoc 0.6983 0.2405 2.903 0.0037 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 424.97 on 327 degrees of freedom
## Residual deviance: 416.54 on 326 degrees of freedom
## (46 observations deleted due to missingness)
## AIC: 420.54
##
## Number of Fisher Scoring iterations: 4
summary(glm(Withdraw ~ Src, data = JD2, family = "binomial"))
##
## Call:
## glm(formula = Withdraw ~ Src, family = "binomial", data = JD2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9830 -0.9830 -0.3609 1.3851 2.3510
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4761 0.1406 -3.386 0.00071 ***
## SrcDoc -2.2224 0.4150 -5.355 8.56e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 381.58 on 324 degrees of freedom
## Residual deviance: 337.12 on 323 degrees of freedom
## (49 observations deleted due to missingness)
## AIC: 341.12
##
## Number of Fisher Scoring iterations: 5