The Correctional Offender Management Profiling for Alternative Sanctions (COMPAS) tool is a risk and needs assessment instrument created by Northpointe, Inc. used, in part, to assess risk of recidivsim. The validity of COMPAS in the management of criminal offenders has been questioned by many groups, including the journalists at ProPublica. The following analyses are presented to determine if there is potential biases in COMPAS scores related to an offenders sex (male, female). To ensure equitable application within the justice system, it is paramount that scores such as COMPAS do not insert unwarranted disparities to specific subpopulations.
In Broward County, Florida COMPAS scores are assigned at the time of booking into jail. These scores include predictions for “Risk of Recidivism” and “Risk of Violent Recidivism”. These scores are compared with actual occurrances of recidivism, specifically recidivism of violent nature. Applying Northpointe’s COMPAS tool, defendents are given a score of 1-10 in each construct. Scores 1 to 4 are considerd “low risk”, 5 to 7 “medium risk”, and 8 to 10 “high risk”.
A dataset containing COMPAS scores, public criminal records, and demographic information including race and sex was compiled by ProPublica and is [publicly available] (https://raw.githubusercontent.com/propublica/compas-analysis/master/cox-violent-parsed.csv)
data <- read.csv(url('https://raw.githubusercontent.com/propublica/compas-analysis/master/cox-violent-parsed.csv'))
dim(data)
## [1] 18316 52
names(data)
## [1] "id" "name"
## [3] "first" "last"
## [5] "compas_screening_date" "sex"
## [7] "dob" "age"
## [9] "age_cat" "race"
## [11] "juv_fel_count" "decile_score"
## [13] "juv_misd_count" "juv_other_count"
## [15] "priors_count" "days_b_screening_arrest"
## [17] "c_jail_in" "c_jail_out"
## [19] "c_case_number" "c_offense_date"
## [21] "c_arrest_date" "c_days_from_compas"
## [23] "c_charge_degree" "c_charge_desc"
## [25] "is_recid" "r_case_number"
## [27] "r_charge_degree" "r_days_from_arrest"
## [29] "r_offense_date" "r_charge_desc"
## [31] "r_jail_in" "r_jail_out"
## [33] "violent_recid" "is_violent_recid"
## [35] "vr_case_number" "vr_charge_degree"
## [37] "vr_offense_date" "vr_charge_desc"
## [39] "type_of_assessment" "decile_score.1"
## [41] "score_text" "screening_date"
## [43] "v_type_of_assessment" "v_decile_score"
## [45] "v_score_text" "v_screening_date"
## [47] "in_custody" "out_custody"
## [49] "priors_count.1" "start"
## [51] "end" "event"
#Reduce to Variable of Interest for Convenience
datred<-data[c(1,6,8,9,10,16,25,33,34,40,41,44,45,46,49,50,51,52)]
summary(datred)
## id sex age age_cat
## Min. : 1 Female: 3383 Min. :18.00 25 - 45 :10372
## 1st Qu.: 2772 Male :14933 1st Qu.:25.00 Greater than 45: 3661
## Median : 5489 Median :31.00 Less than 25 : 4283
## Mean : 5493 Mean :34.02
## 3rd Qu.: 8205 3rd Qu.:41.00
## Max. :11001 Max. :96.00
##
## race days_b_screening_arrest is_recid
## African-American:9791 Min. :-597.000 Min. :-1.0000
## Asian : 71 1st Qu.: -1.000 1st Qu.: 0.0000
## Caucasian :6086 Median : -1.000 Median : 0.0000
## Hispanic :1451 Mean : 4.303 Mean : 0.4148
## Native American : 57 3rd Qu.: 0.000 3rd Qu.: 1.0000
## Other : 860 Max. :1057.000 Max. : 1.0000
## NA's :1297
## violent_recid is_violent_recid decile_score.1 score_text
## Mode:logical Min. :0.00000 Min. :-1.000 High :4614
## NA's:18316 1st Qu.:0.00000 1st Qu.: 2.000 Low :8597
## Median :0.00000 Median : 5.000 Medium:5082
## Mean :0.07311 Mean : 4.997 N/A : 23
## 3rd Qu.:0.00000 3rd Qu.: 8.000
## Max. :1.00000 Max. :10.000
##
## v_decile_score v_score_text v_screening_date priors_count.1
## Min. :-1.000 High : 2377 2013-03-20: 72 Min. : 0.000
## 1st Qu.: 2.000 Low :11147 2013-02-07: 68 1st Qu.: 0.000
## Median : 4.000 Medium: 4786 2013-04-20: 68 Median : 2.000
## Mean : 4.023 N/A : 6 2014-11-12: 67 Mean : 3.913
## 3rd Qu.: 6.000 2013-02-14: 65 3rd Qu.: 5.000
## Max. :10.000 2013-01-12: 64 Max. :43.000
## (Other) :17912
## start end event
## Min. : 0.0 Min. : 0.0 Min. :0.00000
## 1st Qu.: 0.0 1st Qu.: 364.0 1st Qu.:0.00000
## Median : 7.0 Median : 634.0 Median :0.00000
## Mean : 186.4 Mean : 625.2 Mean :0.04471
## 3rd Qu.: 309.0 3rd Qu.: 897.0 3rd Qu.:0.00000
## Max. :1197.0 Max. :1187.0 Max. :1.00000
##
# End time later than start time clean-up
dat2 <- datred[datred$end > datred$start,]
dim(dat2)
## [1] 18200 18
# Use only first entry for each offender
dat <- dat2[!duplicated(dat2$id),]
dim(dat)
## [1] 10999 18
# Set Factors
dat$id<-as.factor(dat$id)
dat$is_recid<-as.factor(dat$is_recid)
dat$event<-as.factor(dat$event)
dat$is_violent_recid<-as.factor(dat$is_violent_recid)
# Verify levels of variables to remove erroneous datapoints
#levels(dat$decile_score.1)
#levels(dat$v_decile_score)
#levels(dat$priors_count.1)
datfin <- subset(dat,dat$decile_score.1 != "-1" & dat$v_decile_score != "-1" & dat$is_recid != "-1" & dat$days_b_screening_arrest >=-30 & dat$days_b_screening_arrest <=30)
#order categorical variables
datfin$age_cat <- ordered(datfin$age_cat, levels = c("Less than 25","25 - 45","Greater than 45"))
datfin$score_text <- ordered(datfin$score_text, levels = c("Low","Medium","High"))
datfin$v_score_text <- ordered(datfin$v_score_text, levels = c("Low","Medium","High"))
## Add Risk Time
datfin$t_atrisk<-datfin$end-datfin$start
## Add Sex Race Variable
library(plyr)
datfin$consolrace<-revalue(datfin$race, c("Asian"="Other", "Native American"="Other"))
datfin$sexrace<-paste(datfin$sex,datfin$consolrace)
summary(datfin)
## id sex age age_cat
## 1 : 1 Female:1794 Min. :18.00 Less than 25 :1884
## 3 : 1 Male :6985 1st Qu.:25.00 25 - 45 :5023
## 4 : 1 Median :31.00 Greater than 45:1872
## 7 : 1 Mean :34.72
## 8 : 1 3rd Qu.:42.00
## 9 : 1 Max. :96.00
## (Other):8773
## race days_b_screening_arrest is_recid violent_recid
## African-American:4382 Min. :-30.00 -1: 0 Mode:logical
## Asian : 46 1st Qu.: -1.00 0 :5790 NA's:8779
## Caucasian :3041 Median : -1.00 1 :2989
## Hispanic : 761 Mean : -1.79
## Native American : 23 3rd Qu.: -1.00
## Other : 526 Max. : 30.00
##
## is_violent_recid decile_score.1 score_text v_decile_score v_score_text
## 0:8088 Min. : 1.000 Low :5028 Min. : 1.00 Low :5997
## 1: 691 1st Qu.: 2.000 Medium:2187 1st Qu.: 1.00 Medium:1968
## Median : 4.000 High :1564 Median : 3.00 High : 814
## Mean : 4.305 Mean : 3.55
## 3rd Qu.: 7.000 3rd Qu.: 5.00
## Max. :10.000 Max. :10.00
##
## v_screening_date priors_count.1 start end
## 2013-04-20: 31 Min. : 0.000 Min. : 0.00 Min. : 1.0
## 2013-02-07: 29 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 257.0
## 2013-03-20: 29 Median : 1.000 Median : 0.00 Median : 597.0
## 2014-11-12: 27 Mean : 3.019 Mean : 28.95 Mean : 582.2
## 2013-02-14: 26 3rd Qu.: 4.000 3rd Qu.: 3.00 3rd Qu.: 863.0
## 2013-02-22: 26 Max. :38.000 Max. :1185.00 Max. :1186.0
## (Other) :8611
## event t_atrisk consolrace sexrace
## 0:8343 Min. : 1.0 African-American:4382 Length:8779
## 1: 436 1st Qu.: 211.0 Other : 595 Class :character
## Median : 564.0 Caucasian :3041 Mode :character
## Mean : 553.2 Hispanic : 761
## 3rd Qu.: 836.0
## Max. :1185.0
##
dim(datfin)
## [1] 8779 21
head(datfin)
## id sex age age_cat race days_b_screening_arrest
## 1 1 Male 69 Greater than 45 Other -1
## 4 3 Male 34 25 - 45 African-American -1
## 5 4 Male 24 Less than 25 African-American -1
## 12 7 Male 44 25 - 45 Other 0
## 13 8 Male 41 25 - 45 Caucasian -1
## 15 9 Male 43 25 - 45 Other -1
## is_recid violent_recid is_violent_recid decile_score.1 score_text
## 1 0 NA 0 1 Low
## 4 1 NA 1 3 Low
## 5 1 NA 0 4 Low
## 12 0 NA 0 1 Low
## 13 1 NA 0 6 Medium
## 15 0 NA 0 4 Low
## v_decile_score v_score_text v_screening_date priors_count.1 start end event
## 1 1 Low 2013-08-14 0 0 327 0
## 4 1 Low 2013-01-27 0 9 159 1
## 5 3 Low 2013-04-14 4 0 63 0
## 12 1 Low 2013-11-30 0 1 853 0
## 13 2 Low 2014-02-19 14 5 40 0
## 15 3 Low 2013-08-30 3 0 265 0
## t_atrisk consolrace sexrace
## 1 327 Other Male Other
## 4 150 African-American Male African-American
## 5 63 African-American Male African-American
## 12 852 Other Male Other
## 13 35 Caucasian Male Caucasian
## 15 265 Other Male Other
Trends in the data: * The majority of offenders in the dataset are male +20.44% Female + 79.56% Male
The female subset of the study population is slightly younger than the male subset (59.20% vs. 56.71% in the 25-45 age category)
The proportion of minority male offenders is higher than the proportion of females in that same minority classification for all minority classifications (African American, Asian, Hispanic, Native American, Other), while the proportion of Caucasian males is lower than the proportion of females in their race category
Females tend to be assigned COMPAS scores in the low risk category more often than men, but also recidivate at lower frequencies than their male counterparts.
table(datfin$sex)
##
## Female Male
## 1794 6985
table(datfin$sex)/sum(!is.na(datfin$sex))*100
##
## Female Male
## 20.43513 79.56487
library(ggplot2)
ggplot(datfin, aes(x=age_cat, group=sex)) +
geom_bar(aes(y=..prop..,fill=factor(..x..)), stat="count") + geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -.5) + labs(y = "Percent", x="Age", fill="Age") + facet_grid(~sex) + scale_y_continuous(labels=scales::percent)
ggplot(datfin, aes(race)) +
geom_bar(fill='blue')
raceplot<-ggplot(datfin, aes(x=race, group=sex)) +
geom_bar(aes(y=..prop..,fill=factor(..x..)), stat="count") + geom_text(size=3, aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -.5) + labs(y = "Percent", x="Race", fill="Race") + facet_grid(~sex) + scale_y_continuous(labels=scales::percent)
raceplot + theme(axis.text.x = element_text(angle=45, vjust = 1, hjust = 1))
ggplot(datfin, aes(score_text)) +
geom_bar(fill='blue')
ggplot(datfin, aes(x=score_text, group=sex)) +
geom_bar(aes(y=..prop..,fill=factor(..x..)), stat="count") + geom_text(size=3, aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -0.7, hjust=0.4) + labs(y = "Percent", x="Decile category", fill="Decile") + facet_grid(~sex) + scale_y_continuous(labels=scales::percent)
ggplot(datfin, aes(is_recid)) +
geom_bar(fill='blue')
ggplot(datfin, aes(x=is_recid, group=sex)) +
geom_bar(aes(y=..prop..,fill=factor(..x..)), stat="count") + geom_text(size=3, aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -0.7, hjust=0.4) + labs(y = "Percent", x="Recidivism", fill="Recidivism") + facet_grid(~sex) + scale_y_continuous(labels=scales::percent)
ggplot(datfin, aes(v_score_text)) +
geom_bar(fill='blue')
ggplot(datfin, aes(x=v_score_text, group=sex)) +
geom_bar(aes(y=..prop..,fill=factor(..x..)), stat="count") + geom_text(size=3, aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -0.7, hjust=0.4) + labs(y = "Percent", x="Violent Decile category", fill="V Decile") + facet_grid(~sex) + scale_y_continuous(labels=scales::percent)
ggplot(datfin, aes(is_violent_recid)) +
geom_bar(fill='blue')
ggplot(datfin, aes(x=is_violent_recid, group=sex)) +
geom_bar(aes(y=..prop..,fill=factor(..x..)), stat="count") + geom_text(size=3, aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = -0.7, hjust=0.4) + labs(y = "Percent", x="Violent Recidivism", fill="V Recid") + facet_grid(~sex) + scale_y_continuous(labels=scales::percent)
library(survival)
library(ggfortify)
survobj <- with(datfin, Surv(t_atrisk, is_violent_recid==1))
fit0 <- survfit(survobj~1, data=datfin)
summary(fit0)
## Call: survfit(formula = survobj ~ 1, data = datfin)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 8779 3 1.000 0.000197 0.999 1.000
## 2 8670 3 0.999 0.000281 0.999 1.000
## 3 8660 3 0.999 0.000344 0.998 1.000
## 4 8647 8 0.998 0.000474 0.997 0.999
## 5 8629 2 0.998 0.000502 0.997 0.999
## 6 8613 2 0.998 0.000528 0.997 0.999
## 7 8598 2 0.997 0.000552 0.996 0.998
## 8 8573 2 0.997 0.000576 0.996 0.998
## 9 8551 3 0.997 0.000610 0.996 0.998
## 10 8535 4 0.996 0.000653 0.995 0.998
## 11 8525 1 0.996 0.000664 0.995 0.997
## 12 8513 2 0.996 0.000684 0.995 0.997
## 13 8505 5 0.995 0.000732 0.994 0.997
## 14 8486 4 0.995 0.000768 0.993 0.996
## 15 8475 3 0.995 0.000794 0.993 0.996
## 16 8459 5 0.994 0.000836 0.992 0.996
## 17 8444 1 0.994 0.000844 0.992 0.995
## 18 8429 2 0.994 0.000861 0.992 0.995
## 19 8412 2 0.993 0.000876 0.992 0.995
## 20 8394 5 0.993 0.000915 0.991 0.995
## 21 8377 3 0.992 0.000937 0.991 0.994
## 22 8361 5 0.992 0.000974 0.990 0.994
## 23 8344 2 0.992 0.000988 0.990 0.994
## 24 8334 4 0.991 0.001016 0.989 0.993
## 25 8318 6 0.990 0.001056 0.988 0.992
## 26 8303 1 0.990 0.001063 0.988 0.992
## 27 8292 6 0.990 0.001101 0.987 0.992
## 28 8281 3 0.989 0.001120 0.987 0.991
## 29 8266 2 0.989 0.001133 0.987 0.991
## 30 8248 6 0.988 0.001169 0.986 0.991
## 31 8228 4 0.988 0.001193 0.985 0.990
## 32 8217 2 0.988 0.001205 0.985 0.990
## 33 8207 5 0.987 0.001234 0.985 0.989
## 34 8191 2 0.987 0.001245 0.984 0.989
## 35 8172 1 0.987 0.001251 0.984 0.989
## 36 8158 1 0.986 0.001257 0.984 0.989
## 37 8147 4 0.986 0.001279 0.983 0.988
## 38 8133 1 0.986 0.001285 0.983 0.988
## 39 8115 1 0.986 0.001290 0.983 0.988
## 40 8102 2 0.985 0.001301 0.983 0.988
## 41 8091 4 0.985 0.001323 0.982 0.988
## 42 8081 2 0.985 0.001334 0.982 0.987
## 43 8071 1 0.985 0.001340 0.982 0.987
## 44 8061 2 0.984 0.001350 0.982 0.987
## 45 8052 1 0.984 0.001356 0.982 0.987
## 47 8031 4 0.984 0.001377 0.981 0.986
## 48 8019 3 0.983 0.001393 0.981 0.986
## 49 8003 3 0.983 0.001408 0.980 0.986
## 50 7989 4 0.983 0.001429 0.980 0.985
## 51 7974 5 0.982 0.001454 0.979 0.985
## 52 7953 3 0.982 0.001470 0.979 0.984
## 53 7938 1 0.981 0.001475 0.979 0.984
## 54 7923 1 0.981 0.001480 0.978 0.984
## 55 7916 4 0.981 0.001499 0.978 0.984
## 57 7886 2 0.981 0.001509 0.978 0.984
## 58 7876 3 0.980 0.001524 0.977 0.983
## 59 7864 2 0.980 0.001534 0.977 0.983
## 60 7855 3 0.980 0.001548 0.977 0.983
## 61 7839 1 0.979 0.001553 0.976 0.982
## 62 7827 2 0.979 0.001563 0.976 0.982
## 63 7811 2 0.979 0.001573 0.976 0.982
## 64 7804 1 0.979 0.001577 0.976 0.982
## 65 7791 2 0.979 0.001587 0.975 0.982
## 66 7782 6 0.978 0.001615 0.975 0.981
## 67 7766 3 0.977 0.001629 0.974 0.981
## 68 7752 2 0.977 0.001639 0.974 0.980
## 69 7740 2 0.977 0.001648 0.974 0.980
## 70 7723 2 0.977 0.001657 0.973 0.980
## 72 7701 2 0.976 0.001666 0.973 0.980
## 73 7691 3 0.976 0.001680 0.973 0.979
## 74 7682 3 0.976 0.001694 0.972 0.979
## 75 7673 1 0.976 0.001698 0.972 0.979
## 76 7662 3 0.975 0.001712 0.972 0.978
## 78 7636 3 0.975 0.001726 0.971 0.978
## 79 7629 4 0.974 0.001744 0.971 0.978
## 80 7619 3 0.974 0.001757 0.970 0.977
## 83 7591 1 0.974 0.001761 0.970 0.977
## 84 7581 2 0.973 0.001770 0.970 0.977
## 85 7569 3 0.973 0.001783 0.970 0.977
## 86 7559 4 0.973 0.001801 0.969 0.976
## 87 7547 1 0.972 0.001805 0.969 0.976
## 88 7536 2 0.972 0.001814 0.969 0.976
## 89 7531 3 0.972 0.001827 0.968 0.975
## 90 7514 3 0.971 0.001840 0.968 0.975
## 91 7507 4 0.971 0.001857 0.967 0.975
## 92 7497 1 0.971 0.001861 0.967 0.974
## 93 7488 1 0.971 0.001866 0.967 0.974
## 94 7475 5 0.970 0.001887 0.966 0.974
## 95 7464 3 0.970 0.001899 0.966 0.973
## 96 7455 2 0.969 0.001908 0.966 0.973
## 98 7437 1 0.969 0.001912 0.965 0.973
## 99 7424 3 0.969 0.001925 0.965 0.973
## 100 7410 2 0.969 0.001933 0.965 0.972
## 101 7402 1 0.968 0.001937 0.965 0.972
## 102 7395 4 0.968 0.001954 0.964 0.972
## 104 7377 5 0.967 0.001974 0.963 0.971
## 106 7356 4 0.967 0.001991 0.963 0.971
## 107 7340 3 0.966 0.002003 0.962 0.970
## 108 7331 1 0.966 0.002007 0.962 0.970
## 110 7308 3 0.966 0.002019 0.962 0.970
## 115 7270 2 0.966 0.002027 0.962 0.970
## 117 7253 3 0.965 0.002040 0.961 0.969
## 119 7235 2 0.965 0.002048 0.961 0.969
## 120 7227 4 0.964 0.002064 0.960 0.968
## 121 7217 2 0.964 0.002072 0.960 0.968
## 123 7199 2 0.964 0.002080 0.960 0.968
## 126 7180 1 0.964 0.002084 0.960 0.968
## 127 7169 1 0.964 0.002088 0.959 0.968
## 129 7157 1 0.963 0.002092 0.959 0.967
## 130 7147 2 0.963 0.002100 0.959 0.967
## 131 7134 1 0.963 0.002104 0.959 0.967
## 132 7130 3 0.963 0.002116 0.958 0.967
## 134 7117 2 0.962 0.002124 0.958 0.966
## 136 7097 3 0.962 0.002136 0.958 0.966
## 137 7087 3 0.961 0.002148 0.957 0.966
## 139 7062 1 0.961 0.002152 0.957 0.966
## 140 7056 2 0.961 0.002160 0.957 0.965
## 142 7045 1 0.961 0.002164 0.957 0.965
## 144 7034 3 0.961 0.002176 0.956 0.965
## 145 7026 3 0.960 0.002188 0.956 0.964
## 146 7019 4 0.960 0.002204 0.955 0.964
## 147 7006 2 0.959 0.002212 0.955 0.964
## 148 6993 3 0.959 0.002224 0.955 0.963
## 149 6985 2 0.959 0.002231 0.954 0.963
## 150 6975 4 0.958 0.002247 0.954 0.962
## 151 6961 2 0.958 0.002255 0.953 0.962
## 152 6953 2 0.958 0.002263 0.953 0.962
## 153 6946 1 0.957 0.002266 0.953 0.962
## 154 6937 1 0.957 0.002270 0.953 0.962
## 156 6929 1 0.957 0.002274 0.953 0.962
## 158 6918 1 0.957 0.002278 0.953 0.961
## 159 6911 1 0.957 0.002282 0.952 0.961
## 161 6895 1 0.957 0.002286 0.952 0.961
## 163 6880 1 0.957 0.002290 0.952 0.961
## 164 6877 1 0.956 0.002294 0.952 0.961
## 165 6870 1 0.956 0.002298 0.952 0.961
## 166 6865 3 0.956 0.002309 0.951 0.960
## 167 6857 2 0.956 0.002317 0.951 0.960
## 168 6851 2 0.955 0.002325 0.951 0.960
## 169 6845 4 0.955 0.002340 0.950 0.959
## 170 6832 3 0.954 0.002351 0.950 0.959
## 171 6820 2 0.954 0.002359 0.949 0.959
## 172 6814 5 0.953 0.002378 0.949 0.958
## 173 6802 2 0.953 0.002385 0.948 0.958
## 174 6793 1 0.953 0.002389 0.948 0.958
## 176 6778 1 0.953 0.002393 0.948 0.957
## 177 6771 2 0.952 0.002401 0.948 0.957
## 180 6753 1 0.952 0.002404 0.948 0.957
## 181 6748 1 0.952 0.002408 0.948 0.957
## 182 6739 2 0.952 0.002416 0.947 0.957
## 185 6720 1 0.952 0.002420 0.947 0.957
## 186 6713 1 0.952 0.002423 0.947 0.956
## 187 6710 1 0.952 0.002427 0.947 0.956
## 188 6707 1 0.951 0.002431 0.947 0.956
## 189 6701 2 0.951 0.002438 0.946 0.956
## 190 6689 1 0.951 0.002442 0.946 0.956
## 191 6686 1 0.951 0.002446 0.946 0.956
## 195 6673 2 0.951 0.002454 0.946 0.955
## 197 6662 2 0.950 0.002461 0.945 0.955
## 198 6655 1 0.950 0.002465 0.945 0.955
## 199 6649 1 0.950 0.002469 0.945 0.955
## 202 6641 1 0.950 0.002472 0.945 0.955
## 203 6638 1 0.950 0.002476 0.945 0.955
## 204 6631 1 0.950 0.002480 0.945 0.954
## 205 6622 1 0.949 0.002484 0.945 0.954
## 209 6603 2 0.949 0.002491 0.944 0.954
## 212 6579 3 0.949 0.002503 0.944 0.954
## 213 6574 1 0.948 0.002506 0.944 0.953
## 214 6569 1 0.948 0.002510 0.943 0.953
## 216 6554 1 0.948 0.002514 0.943 0.953
## 217 6548 1 0.948 0.002518 0.943 0.953
## 218 6542 2 0.948 0.002525 0.943 0.953
## 219 6531 1 0.948 0.002529 0.943 0.953
## 220 6524 1 0.947 0.002533 0.943 0.952
## 222 6519 2 0.947 0.002540 0.942 0.952
## 223 6512 1 0.947 0.002544 0.942 0.952
## 224 6508 2 0.947 0.002552 0.942 0.952
## 226 6498 1 0.947 0.002555 0.942 0.952
## 229 6487 2 0.946 0.002563 0.941 0.951
## 230 6479 2 0.946 0.002570 0.941 0.951
## 231 6474 1 0.946 0.002574 0.941 0.951
## 232 6469 1 0.946 0.002578 0.941 0.951
## 233 6458 1 0.946 0.002582 0.941 0.951
## 236 6439 2 0.945 0.002589 0.940 0.950
## 238 6425 1 0.945 0.002593 0.940 0.950
## 241 6409 4 0.945 0.002608 0.939 0.950
## 242 6401 3 0.944 0.002619 0.939 0.949
## 243 6395 2 0.944 0.002627 0.939 0.949
## 247 6374 1 0.944 0.002631 0.939 0.949
## 250 6357 3 0.943 0.002642 0.938 0.948
## 251 6350 1 0.943 0.002646 0.938 0.948
## 252 6346 2 0.943 0.002653 0.938 0.948
## 253 6340 1 0.943 0.002657 0.937 0.948
## 255 6325 1 0.942 0.002661 0.937 0.948
## 256 6320 2 0.942 0.002668 0.937 0.947
## 257 6314 1 0.942 0.002672 0.937 0.947
## 259 6302 3 0.942 0.002683 0.936 0.947
## 261 6294 1 0.941 0.002687 0.936 0.947
## 262 6291 1 0.941 0.002691 0.936 0.947
## 263 6288 3 0.941 0.002702 0.936 0.946
## 266 6277 2 0.941 0.002709 0.935 0.946
## 267 6273 1 0.940 0.002713 0.935 0.946
## 268 6267 1 0.940 0.002717 0.935 0.946
## 269 6265 1 0.940 0.002720 0.935 0.945
## 272 6250 1 0.940 0.002724 0.935 0.945
## 274 6240 1 0.940 0.002728 0.934 0.945
## 275 6238 1 0.940 0.002732 0.934 0.945
## 277 6229 1 0.939 0.002735 0.934 0.945
## 278 6223 1 0.939 0.002739 0.934 0.945
## 279 6218 2 0.939 0.002747 0.934 0.944
## 280 6213 1 0.939 0.002750 0.934 0.944
## 281 6209 1 0.939 0.002754 0.933 0.944
## 282 6206 1 0.939 0.002758 0.933 0.944
## 283 6203 1 0.938 0.002761 0.933 0.944
## 284 6199 1 0.938 0.002765 0.933 0.944
## 285 6198 1 0.938 0.002769 0.933 0.944
## 287 6189 1 0.938 0.002772 0.933 0.943
## 289 6178 1 0.938 0.002776 0.932 0.943
## 293 6161 1 0.938 0.002780 0.932 0.943
## 294 6154 1 0.938 0.002784 0.932 0.943
## 295 6151 1 0.937 0.002787 0.932 0.943
## 296 6148 2 0.937 0.002795 0.932 0.943
## 297 6143 1 0.937 0.002798 0.931 0.942
## 303 6118 1 0.937 0.002802 0.931 0.942
## 305 6113 1 0.937 0.002806 0.931 0.942
## 308 6103 1 0.936 0.002810 0.931 0.942
## 309 6098 1 0.936 0.002813 0.931 0.942
## 311 6093 1 0.936 0.002817 0.931 0.942
## 312 6089 2 0.936 0.002825 0.930 0.941
## 313 6083 1 0.936 0.002828 0.930 0.941
## 314 6079 1 0.936 0.002832 0.930 0.941
## 318 6059 2 0.935 0.002839 0.930 0.941
## 320 6052 2 0.935 0.002847 0.929 0.941
## 323 6039 1 0.935 0.002851 0.929 0.940
## 325 6030 1 0.935 0.002854 0.929 0.940
## 326 6025 3 0.934 0.002866 0.929 0.940
## 329 6010 1 0.934 0.002869 0.928 0.940
## 330 6004 3 0.934 0.002881 0.928 0.939
## 332 5998 1 0.933 0.002884 0.928 0.939
## 334 5991 1 0.933 0.002888 0.928 0.939
## 335 5988 1 0.933 0.002892 0.927 0.939
## 337 5974 1 0.933 0.002895 0.927 0.939
## 338 5968 1 0.933 0.002899 0.927 0.938
## 341 5955 2 0.932 0.002907 0.927 0.938
## 343 5948 1 0.932 0.002910 0.927 0.938
## 345 5944 1 0.932 0.002914 0.926 0.938
## 350 5928 2 0.932 0.002922 0.926 0.938
## 352 5920 1 0.932 0.002925 0.926 0.937
## 356 5902 1 0.931 0.002929 0.926 0.937
## 357 5896 3 0.931 0.002940 0.925 0.937
## 358 5891 1 0.931 0.002944 0.925 0.937
## 359 5887 1 0.931 0.002948 0.925 0.936
## 363 5872 1 0.931 0.002952 0.925 0.936
## 367 5854 1 0.930 0.002955 0.925 0.936
## 368 5849 2 0.930 0.002963 0.924 0.936
## 369 5840 1 0.930 0.002967 0.924 0.936
## 370 5837 1 0.930 0.002970 0.924 0.936
## 372 5826 1 0.930 0.002974 0.924 0.935
## 373 5822 1 0.929 0.002978 0.924 0.935
## 374 5817 2 0.929 0.002986 0.923 0.935
## 375 5814 1 0.929 0.002989 0.923 0.935
## 376 5812 1 0.929 0.002993 0.923 0.935
## 377 5808 1 0.929 0.002997 0.923 0.935
## 379 5801 1 0.928 0.003001 0.923 0.934
## 380 5797 2 0.928 0.003008 0.922 0.934
## 384 5783 1 0.928 0.003012 0.922 0.934
## 386 5775 1 0.928 0.003016 0.922 0.934
## 387 5770 3 0.927 0.003027 0.921 0.933
## 391 5751 1 0.927 0.003031 0.921 0.933
## 395 5735 1 0.927 0.003034 0.921 0.933
## 396 5733 1 0.927 0.003038 0.921 0.933
## 399 5715 1 0.927 0.003042 0.921 0.933
## 401 5713 1 0.927 0.003046 0.921 0.933
## 404 5703 1 0.926 0.003050 0.920 0.932
## 406 5696 1 0.926 0.003053 0.920 0.932
## 407 5690 1 0.926 0.003057 0.920 0.932
## 410 5681 1 0.926 0.003061 0.920 0.932
## 412 5676 1 0.926 0.003065 0.920 0.932
## 414 5673 3 0.925 0.003076 0.919 0.931
## 415 5665 1 0.925 0.003080 0.919 0.931
## 416 5657 1 0.925 0.003084 0.919 0.931
## 418 5651 1 0.925 0.003087 0.919 0.931
## 421 5646 1 0.925 0.003091 0.919 0.931
## 427 5621 1 0.924 0.003095 0.918 0.930
## 430 5607 1 0.924 0.003099 0.918 0.930
## 432 5598 1 0.924 0.003103 0.918 0.930
## 434 5593 1 0.924 0.003107 0.918 0.930
## 436 5581 3 0.923 0.003118 0.917 0.930
## 438 5573 1 0.923 0.003122 0.917 0.929
## 443 5554 1 0.923 0.003126 0.917 0.929
## 446 5542 1 0.923 0.003130 0.917 0.929
## 447 5538 1 0.923 0.003134 0.917 0.929
## 448 5533 1 0.923 0.003137 0.916 0.929
## 449 5529 1 0.922 0.003141 0.916 0.929
## 450 5525 1 0.922 0.003145 0.916 0.928
## 451 5521 1 0.922 0.003149 0.916 0.928
## 453 5513 1 0.922 0.003153 0.916 0.928
## 455 5506 2 0.922 0.003161 0.915 0.928
## 457 5495 1 0.921 0.003165 0.915 0.928
## 459 5483 1 0.921 0.003168 0.915 0.927
## 462 5445 1 0.921 0.003172 0.915 0.927
## 466 5403 1 0.921 0.003176 0.915 0.927
## 470 5357 1 0.921 0.003180 0.915 0.927
## 471 5346 1 0.921 0.003184 0.914 0.927
## 472 5332 1 0.920 0.003189 0.914 0.927
## 473 5316 1 0.920 0.003193 0.914 0.926
## 476 5293 1 0.920 0.003197 0.914 0.926
## 479 5260 1 0.920 0.003201 0.914 0.926
## 480 5245 1 0.920 0.003205 0.913 0.926
## 482 5222 1 0.920 0.003209 0.913 0.926
## 486 5185 1 0.919 0.003214 0.913 0.926
## 487 5181 1 0.919 0.003218 0.913 0.925
## 489 5172 1 0.919 0.003222 0.913 0.925
## 491 5154 1 0.919 0.003226 0.912 0.925
## 493 5128 2 0.918 0.003235 0.912 0.925
## 498 5072 1 0.918 0.003240 0.912 0.925
## 499 5059 1 0.918 0.003244 0.912 0.924
## 503 5012 1 0.918 0.003249 0.912 0.924
## 506 4980 1 0.918 0.003253 0.911 0.924
## 507 4963 1 0.918 0.003258 0.911 0.924
## 508 4951 1 0.917 0.003262 0.911 0.924
## 511 4925 1 0.917 0.003267 0.911 0.924
## 521 4815 1 0.917 0.003272 0.911 0.923
## 522 4807 1 0.917 0.003277 0.910 0.923
## 523 4798 1 0.917 0.003282 0.910 0.923
## 528 4757 2 0.916 0.003292 0.910 0.923
## 538 4653 1 0.916 0.003297 0.910 0.922
## 539 4643 1 0.916 0.003302 0.909 0.922
## 540 4631 1 0.916 0.003307 0.909 0.922
## 544 4595 1 0.915 0.003312 0.909 0.922
## 547 4573 2 0.915 0.003323 0.909 0.922
## 548 4563 1 0.915 0.003328 0.908 0.921
## 551 4527 1 0.915 0.003334 0.908 0.921
## 555 4502 2 0.914 0.003345 0.908 0.921
## 556 4488 1 0.914 0.003350 0.907 0.921
## 565 4379 1 0.914 0.003356 0.907 0.920
## 571 4339 1 0.914 0.003362 0.907 0.920
## 574 4320 1 0.913 0.003367 0.907 0.920
## 576 4303 1 0.913 0.003373 0.907 0.920
## 577 4295 1 0.913 0.003379 0.906 0.920
## 578 4292 2 0.913 0.003391 0.906 0.919
## 587 4233 2 0.912 0.003403 0.905 0.919
## 601 4120 1 0.912 0.003409 0.905 0.919
## 609 4050 1 0.912 0.003416 0.905 0.918
## 613 4009 1 0.911 0.003423 0.905 0.918
## 614 3996 1 0.911 0.003430 0.904 0.918
## 626 3910 1 0.911 0.003437 0.904 0.918
## 628 3888 1 0.911 0.003444 0.904 0.917
## 631 3869 1 0.910 0.003451 0.904 0.917
## 632 3858 1 0.910 0.003458 0.903 0.917
## 639 3791 1 0.910 0.003465 0.903 0.917
## 645 3752 1 0.910 0.003473 0.903 0.917
## 646 3738 1 0.910 0.003481 0.903 0.916
## 649 3697 2 0.909 0.003496 0.902 0.916
## 651 3680 1 0.909 0.003504 0.902 0.916
## 653 3666 1 0.909 0.003512 0.902 0.915
## 657 3629 2 0.908 0.003527 0.901 0.915
## 662 3586 1 0.908 0.003536 0.901 0.915
## 665 3565 1 0.908 0.003544 0.901 0.914
## 668 3535 1 0.907 0.003552 0.900 0.914
## 673 3478 1 0.907 0.003560 0.900 0.914
## 685 3401 1 0.907 0.003569 0.900 0.914
## 691 3353 3 0.906 0.003597 0.899 0.913
## 705 3248 1 0.906 0.003607 0.899 0.913
## 718 3145 1 0.905 0.003617 0.898 0.912
## 719 3133 1 0.905 0.003627 0.898 0.912
## 727 3070 1 0.905 0.003638 0.898 0.912
## 733 3026 1 0.904 0.003649 0.897 0.912
## 741 2969 1 0.904 0.003661 0.897 0.911
## 743 2957 1 0.904 0.003672 0.897 0.911
## 747 2921 2 0.903 0.003696 0.896 0.911
## 754 2863 1 0.903 0.003708 0.896 0.910
## 759 2827 1 0.903 0.003720 0.895 0.910
## 763 2795 1 0.902 0.003733 0.895 0.910
## 767 2760 1 0.902 0.003746 0.895 0.909
## 771 2739 1 0.902 0.003759 0.894 0.909
## 772 2730 1 0.901 0.003772 0.894 0.909
## 775 2701 1 0.901 0.003785 0.894 0.908
## 776 2691 1 0.901 0.003799 0.893 0.908
## 779 2668 1 0.900 0.003812 0.893 0.908
## 794 2521 1 0.900 0.003827 0.892 0.907
## 796 2504 1 0.900 0.003843 0.892 0.907
## 815 2365 1 0.899 0.003860 0.892 0.907
## 827 2271 1 0.899 0.003878 0.891 0.906
## 858 2032 1 0.898 0.003902 0.891 0.906
## 869 1960 1 0.898 0.003926 0.890 0.906
## 878 1896 1 0.897 0.003953 0.890 0.905
## 881 1875 1 0.897 0.003980 0.889 0.905
## 889 1829 1 0.896 0.004007 0.889 0.904
## 891 1815 1 0.896 0.004036 0.888 0.904
## 892 1805 1 0.895 0.004064 0.888 0.903
## 897 1761 1 0.895 0.004093 0.887 0.903
## 902 1733 1 0.894 0.004123 0.886 0.903
## 906 1703 1 0.894 0.004154 0.886 0.902
## 943 1428 1 0.893 0.004198 0.885 0.902
## 946 1420 1 0.893 0.004242 0.884 0.901
## 968 1264 1 0.892 0.004297 0.884 0.900
## 1038 1079 1 0.891 0.004372 0.883 0.900
## 1039 1068 1 0.890 0.004447 0.882 0.899
## 1048 1004 1 0.889 0.004530 0.881 0.898
## 1082 752 1 0.888 0.004676 0.879 0.897
plot(fit0, xlab="Time at risk of violent recidivism in Days",
ylab="% not rearrested", yscale=100,
main ="Survival Distribution (Overall)")
fitr <- survfit(survobj~sex, data=datfin)
plot(fitr, xlab="Time at risk of violent recidivism in Days",
ylab="% not rearrested", yscale=100,
main="Survival Distribution by sex",
col = c('red', 'blue'))
legend('bottomleft', legend=levels(as.factor(datfin$sex)), col = c('red', 'blue'), lty=1)
fitr2 <- survfit(survobj~sexrace, data=datfin)
plot(fitr2, xlab="Time at risk of violent recidivism in Days",
ylab="% not rearrested", yscale=100,
main="Survival Distribution by sex/race",
col = c('red', 'orange', 'yellow', 'green', 'blue', 'darkblue', 'darkgreen','hotpink'))
legend('bottomleft', legend=levels(as.factor(datfin$sexrace)), col = c('red', 'orange', 'yellow', 'green', 'blue', 'darkblue', 'darkgreen','hotpink'),lty=1)
survdiff(survobj ~ datfin$sex)
## Call:
## survdiff(formula = survobj ~ datfin$sex)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## datfin$sex=Female 1794 82 153 33.24 42.8
## datfin$sex=Male 6985 609 538 9.49 42.8
##
## Chisq= 42.8 on 1 degrees of freedom, p= 6e-11
Based on the Kaplan-Meier curve stratfied by sex, there is an apparent difference between the time to recidivism between male and female offenders.
A Log-Rank Test comparing male and female offender time to violent recidivism shows that under the null hypothesis (that male and female offenders have the same time to violent recidivism) female offenders are expected to recidivate 1.86X more often than observed in this analysis
The observed difference is further explored using Cox Proportional Hazards Modeling
*Collapsed Native American and Asian into the “Other” category for analyses including race as a covariate
RESULT HIGHLIGHTS
When controlling for violent decile score, age, and prior offense count, white women are at approximate half the risk of while males to recidivate. All other combinations of sex (male/female) and race (African American/Hispanic/Other) do not have statistically significantly different risk of violent recidivism as compared to white males.
While prior offense count is significant in the model, the practical implication of being at 1.06 factor is practically uninteresting, though it may point to additional improvements that can be made to the COMPAS score given that this significance in light of controlling for the violent decile score
datfinref <- within(datfin, consolrace <- relevel(consolrace, ref = "Caucasian"))
datfinref <- within(datfinref, sex <- relevel(sex, ref = "Male"))
head(datfin)
## id sex age age_cat race days_b_screening_arrest
## 1 1 Male 69 Greater than 45 Other -1
## 4 3 Male 34 25 - 45 African-American -1
## 5 4 Male 24 Less than 25 African-American -1
## 12 7 Male 44 25 - 45 Other 0
## 13 8 Male 41 25 - 45 Caucasian -1
## 15 9 Male 43 25 - 45 Other -1
## is_recid violent_recid is_violent_recid decile_score.1 score_text
## 1 0 NA 0 1 Low
## 4 1 NA 1 3 Low
## 5 1 NA 0 4 Low
## 12 0 NA 0 1 Low
## 13 1 NA 0 6 Medium
## 15 0 NA 0 4 Low
## v_decile_score v_score_text v_screening_date priors_count.1 start end event
## 1 1 Low 2013-08-14 0 0 327 0
## 4 1 Low 2013-01-27 0 9 159 1
## 5 3 Low 2013-04-14 4 0 63 0
## 12 1 Low 2013-11-30 0 1 853 0
## 13 2 Low 2014-02-19 14 5 40 0
## 15 3 Low 2013-08-30 3 0 265 0
## t_atrisk consolrace sexrace
## 1 327 Other Male Other
## 4 150 African-American Male African-American
## 5 63 African-American Male African-American
## 12 852 Other Male Other
## 13 35 Caucasian Male Caucasian
## 15 265 Other Male Other
summary(coxph(survobj~v_decile_score, data=datfin))
## Call:
## coxph(formula = survobj ~ v_decile_score, data = datfin)
##
## n= 8779, number of events= 691
##
## coef exp(coef) se(coef) z Pr(>|z|)
## v_decile_score 0.26203 1.29957 0.01393 18.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## v_decile_score 1.3 0.7695 1.265 1.336
##
## Concordance= 0.697 (se = 0.01 )
## Likelihood ratio test= 329.5 on 1 df, p=<2e-16
## Wald test = 354 on 1 df, p=<2e-16
## Score (logrank) test = 387.7 on 1 df, p=<2e-16
summary(coxph(survobj~v_decile_score+sex, data=datfin))
## Call:
## coxph(formula = survobj ~ v_decile_score + sex, data = datfin)
##
## n= 8779, number of events= 691
##
## coef exp(coef) se(coef) z Pr(>|z|)
## v_decile_score 0.25273 1.28754 0.01398 18.077 < 2e-16 ***
## sexMale 0.56490 1.75927 0.11845 4.769 1.85e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## v_decile_score 1.288 0.7767 1.253 1.323
## sexMale 1.759 0.5684 1.395 2.219
##
## Concordance= 0.703 (se = 0.01 )
## Likelihood ratio test= 355.6 on 2 df, p=<2e-16
## Wald test = 373.6 on 2 df, p=<2e-16
## Score (logrank) test = 409.6 on 2 df, p=<2e-16
summary(coxph(survobj~sex, data=datfin))
## Call:
## coxph(formula = survobj ~ sex, data = datfin)
##
## n= 8779, number of events= 691
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sexMale 0.7516 2.1204 0.1177 6.388 1.68e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexMale 2.12 0.4716 1.684 2.67
##
## Concordance= 0.553 (se = 0.006 )
## Likelihood ratio test= 49.22 on 1 df, p=2e-12
## Wald test = 40.81 on 1 df, p=2e-10
## Score (logrank) test = 42.77 on 1 df, p=6e-11
summary(coxph(survobj~sex*consolrace, data=datfinref))
## Call:
## coxph(formula = survobj ~ sex * consolrace, data = datfinref)
##
## n= 8779, number of events= 691
##
## coef exp(coef) se(coef) z
## sexFemale -0.736969 0.478562 0.215061 -3.427
## consolraceAfrican-American 0.545479 1.725434 0.094451 5.775
## consolraceOther 0.116449 1.123501 0.176779 0.659
## consolraceHispanic -0.320675 0.725659 0.191190 -1.677
## sexFemale:consolraceAfrican-American 0.005916 1.005934 0.262537 0.023
## sexFemale:consolraceOther -0.796556 0.450879 0.755814 -1.054
## sexFemale:consolraceHispanic 0.320587 1.377936 0.525888 0.610
## Pr(>|z|)
## sexFemale 0.000611 ***
## consolraceAfrican-American 7.68e-09 ***
## consolraceOther 0.510071
## consolraceHispanic 0.093492 .
## sexFemale:consolraceAfrican-American 0.982022
## sexFemale:consolraceOther 0.291927
## sexFemale:consolraceHispanic 0.542120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexFemale 0.4786 2.0896 0.3140 0.7295
## consolraceAfrican-American 1.7254 0.5796 1.4338 2.0763
## consolraceOther 1.1235 0.8901 0.7945 1.5887
## consolraceHispanic 0.7257 1.3781 0.4989 1.0555
## sexFemale:consolraceAfrican-American 1.0059 0.9941 0.6013 1.6828
## sexFemale:consolraceOther 0.4509 2.2179 0.1025 1.9834
## sexFemale:consolraceHispanic 1.3779 0.7257 0.4916 3.8625
##
## Concordance= 0.613 (se = 0.01 )
## Likelihood ratio test= 111.6 on 7 df, p=<2e-16
## Wald test = 98.28 on 7 df, p=<2e-16
## Score (logrank) test = 106.4 on 7 df, p=<2e-16
summary(coxph(survobj~sex*consolrace + v_decile_score, data=datfinref))
## Call:
## coxph(formula = survobj ~ sex * consolrace + v_decile_score,
## data = datfinref)
##
## n= 8779, number of events= 691
##
## coef exp(coef) se(coef) z
## sexFemale -0.61378 0.54130 0.21531 -2.851
## consolraceAfrican-American 0.18189 1.19948 0.09730 1.869
## consolraceOther 0.20257 1.22454 0.17690 1.145
## consolraceHispanic -0.35861 0.69864 0.19122 -1.875
## v_decile_score 0.24294 1.27499 0.01465 16.578
## sexFemale:consolraceAfrican-American 0.09983 1.10499 0.26259 0.380
## sexFemale:consolraceOther -0.96178 0.38221 0.75597 -1.272
## sexFemale:consolraceHispanic 0.42991 1.53712 0.52599 0.817
## Pr(>|z|)
## sexFemale 0.00436 **
## consolraceAfrican-American 0.06158 .
## consolraceOther 0.25218
## consolraceHispanic 0.06073 .
## v_decile_score < 2e-16 ***
## sexFemale:consolraceAfrican-American 0.70381
## sexFemale:consolraceOther 0.20329
## sexFemale:consolraceHispanic 0.41374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexFemale 0.5413 1.8474 0.35495 0.8255
## consolraceAfrican-American 1.1995 0.8337 0.99122 1.4515
## consolraceOther 1.2245 0.8166 0.86575 1.7320
## consolraceHispanic 0.6986 1.4313 0.48028 1.0163
## v_decile_score 1.2750 0.7843 1.23889 1.3121
## sexFemale:consolraceAfrican-American 1.1050 0.9050 0.66045 1.8487
## sexFemale:consolraceOther 0.3822 2.6163 0.08686 1.6818
## sexFemale:consolraceHispanic 1.5371 0.6506 0.54826 4.3095
##
## Concordance= 0.708 (se = 0.01 )
## Likelihood ratio test= 371.4 on 8 df, p=<2e-16
## Wald test = 381.6 on 8 df, p=<2e-16
## Score (logrank) test = 421.9 on 8 df, p=<2e-16
summary(coxph(survobj~sex*consolrace + v_decile_score + age + priors_count.1, data=datfinref))
## Call:
## coxph(formula = survobj ~ sex * consolrace + v_decile_score +
## age + priors_count.1, data = datfinref)
##
## n= 8779, number of events= 691
##
## coef exp(coef) se(coef) z
## sexFemale -0.581090 0.559288 0.215458 -2.697
## consolraceAfrican-American 0.069650 1.072133 0.098305 0.709
## consolraceOther 0.219631 1.245618 0.177195 1.239
## consolraceHispanic -0.367113 0.692731 0.191378 -1.918
## v_decile_score 0.196476 1.217106 0.018453 10.647
## age -0.011681 0.988387 0.004735 -2.467
## priors_count.1 0.064594 1.066726 0.007056 9.155
## sexFemale:consolraceAfrican-American 0.160350 1.173921 0.262845 0.610
## sexFemale:consolraceOther -0.942704 0.389573 0.755954 -1.247
## sexFemale:consolraceHispanic 0.424874 1.529398 0.526022 0.808
## Pr(>|z|)
## sexFemale 0.0070 **
## consolraceAfrican-American 0.4786
## consolraceOther 0.2152
## consolraceHispanic 0.0551 .
## v_decile_score <2e-16 ***
## age 0.0136 *
## priors_count.1 <2e-16 ***
## sexFemale:consolraceAfrican-American 0.5418
## sexFemale:consolraceOther 0.2124
## sexFemale:consolraceHispanic 0.4193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexFemale 0.5593 1.7880 0.36664 0.8532
## consolraceAfrican-American 1.0721 0.9327 0.88424 1.2999
## consolraceOther 1.2456 0.8028 0.88015 1.7628
## consolraceHispanic 0.6927 1.4436 0.47606 1.0080
## v_decile_score 1.2171 0.8216 1.17387 1.2619
## age 0.9884 1.0117 0.97926 0.9976
## priors_count.1 1.0667 0.9374 1.05208 1.0816
## sexFemale:consolraceAfrican-American 1.1739 0.8518 0.70130 1.9650
## sexFemale:consolraceOther 0.3896 2.5669 0.08854 1.7142
## sexFemale:consolraceHispanic 1.5294 0.6539 0.54547 4.2882
##
## Concordance= 0.726 (se = 0.01 )
## Likelihood ratio test= 444.3 on 10 df, p=<2e-16
## Wald test = 482.4 on 10 df, p=<2e-16
## Score (logrank) test = 543.5 on 10 df, p=<2e-16
boxplot(datfin$v_decile_score ~ datfin$sexrace)
ggplot(datfin, aes(x=sexrace, y=v_decile_score)) + geom_boxplot() + theme(axis.text.x = element_text(angle=45, vjust = 1, hjust = 1))
mytable <- table(datfin$sexrace, datfin$is_violent_recid, datfin$v_score_text)
ftable(mytable)
## Low Medium High
##
## Female African-American 0 545 177 47
## 1 31 14 5
## Female Caucasian 0 611 89 16
## 1 16 7 2
## Female Hispanic 0 111 13 0
## 1 5 0 0
## Female Other 0 85 16 2
## 1 2 0 0
## Male African-American 0 1802 928 457
## 1 134 142 100
## Male Caucasian 0 1689 350 101
## 1 100 42 18
## Male Hispanic 0 447 115 37
## 1 20 7 6
## Male Other 0 382 56 12
## 1 17 12 11
library(cutpointr)
pred<-data.frame("SexRace" = c("Female African American", "Female Caucasian", "Female Hispanic", "Female Other","Male African American", "Male Caucasian", "Male Hispanic", "Male Other"), "TP" = c(19,9,0,0,242,60,13,23), "FP" = c(224,105,13,18,1385,451,152,68), "TN" = c(545,611,111,85,1802,1689,447,382), "FN" = c(31,16,5,2,134,100,20,17))
pred
## SexRace TP FP TN FN
## 1 Female African American 19 224 545 31
## 2 Female Caucasian 9 105 611 16
## 3 Female Hispanic 0 13 111 5
## 4 Female Other 0 18 85 2
## 5 Male African American 242 1385 1802 134
## 6 Male Caucasian 60 451 1689 100
## 7 Male Hispanic 13 152 447 20
## 8 Male Other 23 68 382 17
truepos<-round((tpr(pred$TP,pred$FP))*100,digits = 2)
falsepos<-round((fpr(pred$FP,pred$TN))*100,digits = 2)
trueneg<-round((tnr(pred$FP, pred$TN))*100,digits = 2)
falseneg<-round((fnr(pred$TP,pred$FN))*100,digits = 2)
PPV<-round(pred$TP/(pred$TP+pred$FP),digits = 3)
NPV<-round(pred$TN/(pred$TN+pred$FN),digits = 3)
pred2<-data.frame(pred$SexRace,truepos,trueneg,falsepos,falseneg,PPV,NPV)
pred2
## pred.SexRace tpr tnr fpr fnr PPV NPV
## 1 Female African American 7.82 70.87 29.13 62.00 0.078 0.946
## 2 Female Caucasian 7.89 85.34 14.66 64.00 0.079 0.974
## 3 Female Hispanic 0.00 89.52 10.48 100.00 0.000 0.957
## 4 Female Other 0.00 82.52 17.48 100.00 0.000 0.977
## 5 Male African American 14.87 56.54 43.46 35.64 0.149 0.931
## 6 Male Caucasian 11.74 78.93 21.07 62.50 0.117 0.944
## 7 Male Hispanic 7.88 74.62 25.38 60.61 0.079 0.957
## 8 Male Other 25.27 84.89 15.11 42.50 0.253 0.957
knitr::kable(pred2,caption = "Predictive Values")
| pred.SexRace | tpr | tnr | fpr | fnr | PPV | NPV |
|---|---|---|---|---|---|---|
| Female African American | 7.82 | 70.87 | 29.13 | 62.00 | 0.078 | 0.946 |
| Female Caucasian | 7.89 | 85.34 | 14.66 | 64.00 | 0.079 | 0.974 |
| Female Hispanic | 0.00 | 89.52 | 10.48 | 100.00 | 0.000 | 0.957 |
| Female Other | 0.00 | 82.52 | 17.48 | 100.00 | 0.000 | 0.977 |
| Male African American | 14.87 | 56.54 | 43.46 | 35.64 | 0.149 | 0.931 |
| Male Caucasian | 11.74 | 78.93 | 21.07 | 62.50 | 0.117 | 0.944 |
| Male Hispanic | 7.88 | 74.62 | 25.38 | 60.61 | 0.079 | 0.957 |
| Male Other | 25.27 | 84.89 | 15.11 | 42.50 | 0.253 | 0.957 |
library(kableExtra)
kbl(pred2)
| pred.SexRace | tpr | tnr | fpr | fnr | PPV | NPV |
|---|---|---|---|---|---|---|
| Female African American | 7.82 | 70.87 | 29.13 | 62.00 | 0.078 | 0.946 |
| Female Caucasian | 7.89 | 85.34 | 14.66 | 64.00 | 0.079 | 0.974 |
| Female Hispanic | 0.00 | 89.52 | 10.48 | 100.00 | 0.000 | 0.957 |
| Female Other | 0.00 | 82.52 | 17.48 | 100.00 | 0.000 | 0.977 |
| Male African American | 14.87 | 56.54 | 43.46 | 35.64 | 0.149 | 0.931 |
| Male Caucasian | 11.74 | 78.93 | 21.07 | 62.50 | 0.117 | 0.944 |
| Male Hispanic | 7.88 | 74.62 | 25.38 | 60.61 | 0.079 | 0.957 |
| Male Other | 25.27 | 84.89 | 15.11 | 42.50 | 0.253 | 0.957 |
pred2 %>%
kbl() %>%
kable_styling() %>%
add_header_above(c("Sex/Race" = 1, "True Positive Rate" = 1, "True Negative Rate" = 1, "False Positive Rate" = 1, "False Negative Rate" = 1, "Positive Predictive Value" = 1, "Negative Predictive Value" = 1))
| pred.SexRace | tpr | tnr | fpr | fnr | PPV | NPV |
|---|---|---|---|---|---|---|
| Female African American | 7.82 | 70.87 | 29.13 | 62.00 | 0.078 | 0.946 |
| Female Caucasian | 7.89 | 85.34 | 14.66 | 64.00 | 0.079 | 0.974 |
| Female Hispanic | 0.00 | 89.52 | 10.48 | 100.00 | 0.000 | 0.957 |
| Female Other | 0.00 | 82.52 | 17.48 | 100.00 | 0.000 | 0.977 |
| Male African American | 14.87 | 56.54 | 43.46 | 35.64 | 0.149 | 0.931 |
| Male Caucasian | 11.74 | 78.93 | 21.07 | 62.50 | 0.117 | 0.944 |
| Male Hispanic | 7.88 | 74.62 | 25.38 | 60.61 | 0.079 | 0.957 |
| Male Other | 25.27 | 84.89 | 15.11 | 42.50 | 0.253 | 0.957 |