Sungji Peter Shin
3.31.2019
Original dataset contains offender-level data on admissions and releases from state prison, post-confinement community supervision and yearend prison custody records. The purpose of the collection is “to monitor the nation’s correctional population and address specific policy questions related to recidivism, prisoner reentry, and trends in demographic characteristics of the incarcerated and community supervision populations” (). A collection of the data has been made annually and the participation in the data collection is voluntary so that “not all states participate, and not all states have participated for each year” (). Overall number of cases is 14,015,190 and number of variables is 18; however, the scope of this analysis will be limited to 511,253 number of cases that represent inmates who released in year of 2000.
library(sjmisc)
library(tibble)
library(dplyr)
library(Zelig)
library(tibble)
library(tidyverse)
library(tidyr)
library(texreg)
library(pander)
library(data.table)
require(data.table)
prison <- as.data.frame(fread("C:/Users/jw/Desktop/ICPSR_36862/DS0003/36862-0003-Data.tsv"))
head(prison)
## SEX RPTYEAR STATE EDUCATION ADMTYPE OFFGENERAL MAND_PRISREL_YEAR
## 1 1 1991 1 3 1 1 NA
## 2 1 1991 1 2 1 1 NA
## 3 1 1991 1 2 1 3 NA
## 4 1 1991 1 2 1 2 NA
## 5 1 1991 1 3 1 4 NA
## 6 2 1991 1 1 1 3 NA
## PROJ_PRISREL_YEAR PARELIG_YEAR ADMITYR SENTLGTH OFFDETAIL RACE AGEADMIT
## 1 NA NA 1983 5 1 1 2
## 2 NA NA 1987 4 4 1 2
## 3 NA NA 1987 4 12 1 4
## 4 NA NA 1989 4 7 2 3
## 5 NA NA 1989 3 13 9 2
## 6 NA NA 1991 0 12 2 2
## RELYR RELTYPE AGERLSE TIMESRVD_REL
## 1 1991 1 3 3
## 2 1991 2 2 2
## 3 1991 3 4 2
## 4 1991 2 3 2
## 5 1991 2 2 2
## 6 1991 1 2 0
prison2000 <- filter(prison, RPTYEAR == 2000)
dim(prison2000)
## [1] 511253 18
# renaming some of the variables
prison2000 <- rename(prison2000, sex = SEX, year_reported = RPTYEAR, release_type = RELTYPE, offense_detail = OFFDETAIL, state = STATE, education_lvl = EDUCATION, admit_type = ADMTYPE, offense = OFFGENERAL, admit_year = ADMITYR, max_sentence = SENTLGTH, race = RACE, age_at_admit = AGEADMIT, released_year = RELYR, age_at_release = AGERLSE, time_served = TIMESRVD_REL)
# selecting variables of interest
prison2000 <- prison2000 %>%
select(year_reported, state, sex, education_lvl, race, age_at_admit, age_at_release, time_served, admit_type, release_type, offense, offense_detail, max_sentence, admit_year, released_year)
# converting integer variables into factors
prison2000 <- prison2000 %>%
mutate(
state = as.factor(state),
sex = as.factor(sex),
education_lvl = as.factor(education_lvl),
race = as.factor(race),
age_at_admit = as.factor(age_at_admit),
age_at_release = as.factor(age_at_release),
time_served = as.factor(time_served),
admit_type = as.factor(admit_type),
release_type = as.factor(release_type),
offense = as.factor(offense),
offense_detail = as.factor(offense_detail),
max_sentence = as.factor(max_sentence)
)
prison2000 <- prison2000 %>%
mutate(max_sentence = sjmisc::rec(max_sentence, rec = '0=1; 1=2; 2=3; 3=4; 4=5; 5=6; 6=7; 9=9')) %>%
mutate(max_sentence = as.factor(max_sentence))
# creating a new variable
# prison2000 <- prison2000 %>%
# mutate(time = released_year - admit_year)
# table(prison2000$time)
# recoding some values to missing
prison2000$time[prison2000$time == -7999] <- NA
prison2000$time[prison2000$time == 0] <- NA
prison2000$education_lvl[prison2000$education_lvl == '99'] <- NA
prison2000$race[prison2000$race == '9'] <- NA
prison2000$age_at_admit[prison2000$age_at_admit == '9'] <- NA
prison2000$age_at_release[prison2000$age_at_release == '9'] <- NA
prison2000$time_served[prison2000$time_served == '9'] <- NA
prison2000$admit_type[prison2000$admit_type == '9'] <- NA
prison2000$release_type[prison2000$release_type == '9'] <- NA
prison2000$offense[prison2000$offense == '9'] <- NA
prison2000$offense_detail[prison2000$offense_detail == '99'] <- NA
prison2000$max_sentence[prison2000$max_sentence == '9'] <- NA
prison2000 %>%
mutate(
state = fct_recode(state, 'AL' = '1', 'AK' = '2', 'AZ' = '4', 'AR' = '5', 'CA' = '6', 'CO' = '8', 'CT' = '9', 'DE' = '10', 'DC' = '11', 'FL' = '12', 'GA' = '13', 'HI' = '15', 'ID' = '16', 'IL' = '17', 'IN' = '18', 'IA' = '19', 'KS' = '20', 'KY' = '21', 'LA' = '22', 'ME' = '23', 'MD' = '24', 'MA' = '25', 'MI' = '26', 'MN' = '27', 'MS' = '28', 'MO' = '29', 'MT' = '30', 'NE' = '31', 'NV' = '32', 'NH' = '33', 'NJ' = '34', 'NM' = '35', 'NY' = '36', 'NC' = '37', 'ND' = '38', 'OH' = '39', 'OK' = '40', 'OR' = '41', 'PA' = '42', 'RI' = '44', 'SC' = '45', 'SD' = '46', 'TN' = '47', 'TX' = '48', 'UT' = '49', 'VT' = '50', 'VA' = '51', 'WA' = '53', 'WV' = '54', 'WI' = '55', 'WY' = '56'),
sex = fct_recode(sex, 'male' = '1', 'female' = '2'),
education_lvl = fct_recode(education_lvl, 'less_than_high_school' = '1', 'high_school_diploma/GED' = '2', 'any_college' = '3', 'unknown' = '99'),
race = fct_recode(race, 'White_non_hispanic' = '1', 'Black_non_hispanic' = '2', 'Hispanic' = '3', 'Other' = '4', 'Missing' = '9'),
age_at_admit = fct_recode(age_at_admit, '18-24' = '1', '25-34' = '2', '35-44' = '3', '45-54' = '4', 'older than 54' = '5', 'Missing' = '9'),
age_at_release = fct_recode(age_at_release, '18-24' = '1', '25-34' = '2', '35-44' = '3', '45-54' = '4', 'older than 54' = '5', 'Missing' = '9'),
time_served = fct_recode(time_served, 'less than 1' = '0', '1-1.9' = '1', '2-4.9' = '2', '5-9.9' = '3', 'above 10' = '4', 'Missing' = '9'),
admit_type = fct_recode(admit_type, 'new_court_commitment' = '1', 'parole_return/revocation' = '2', 'other_admission' = '3', 'Missing' = '9'),
release_type = fct_recode(release_type, 'conditional_release' = '1', 'unconditional_release' = '2', 'other_release' = '3', 'Missing' = '9'),
offense = fct_recode(offense, 'violent' = '1', 'property' = '2', 'drugs' = '3', 'public_order' = '4', 'other' = '5', 'Missing' = '9'),
offense_detail = fct_recode(offense_detail, 'Murder' = '1', 'Negligent_manslaughter' = '2', 'Rape/sexual_assault' = '3', 'Robbery' = '4', 'Aggravated/simple_assault' = '5', 'Other_violent' = '6', 'Burglary' = '7', 'Larceny' = '8', 'Motor_vehicle_theft' = '9', 'Fraud' = '10', 'Other_property' = '11', 'Drugs' = '12', 'Public_order' = '13', 'Other' = '14', 'Missing' = '99'),
max_sentence = fct_recode(max_sentence, 'less than 1' = '1', '1-1.9' = '2', '2-4.9' = '3', '5-9.9' = '4', '10-24.9' = '5', 'above 25' = '6', 'Life' = '7', 'Missing' = '9')
)
head(prison2000)
A dependent variable is the maximum sentence length for each inmate, ordered from less than 1 year as the least to the life sentence as the most severe. Independent variables of interest include a type of offense (violent, property, drugs, etc.), age at admission to the facility, race, education level, gender, and type of admission (new court commitment, parole return/revocation, or other admission including unsentenced, transfer, AWOL/escape return). Among many possible hypotheses, I particularly expect that there would be a gender inequality when other factors are being constant.
library(zeligverse)
## + Zelig 5.1.6 Date: 2019-03-31
## + ZeligChoice 0.9.6 R: 3.5.2
## + ZeligEI 0.1.2 OS: Windows 7 x64 SP 1
## + Amelia 1.7.5 GUI: RTerm
## + MatchIt 3.0.2 Locale: Korean_Korea.949
## + WhatIf 1.5.9 TZ: America/New_York
prison2000$max_sentence <- factor(prison2000$max_sentence, ordered = TRUE, levels = c('1', '2', '3', '4', '5', '6', '7'))
table(prison2000$max_sentence)
##
## 1 2 3 4 5 6 7
## 53463 55560 208622 115185 57670 13058 2702
z1 = zelig(max_sentence ~ offense, model = 'ologit', data = prison2000, cite = F)
z2 = zelig(max_sentence ~ offense * age_at_admit, model = 'ologit', data = prison2000, cite = F)
z3 = zelig(max_sentence ~ offense * age_at_admit + race, model = 'ologit', data = prison2000, cite = F)
z4 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl, model = 'ologit', data = prison2000, cite = F)
z5 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl + sex, model = 'ologit', data = prison2000, cite = F)
z5 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl + sex + admit_type, model = 'ologit', data = prison2000, cite = F)
table1 <- htmlreg(list(z1, z2, z3, z4, z5), digits = 3)
pander(table1)
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | ||
|---|---|---|---|---|---|---|
| offense2 | -0.906*** | -0.866*** | -0.940*** | -0.917*** | -1.022*** | |
| (0.007) | (0.013) | (0.015) | (0.018) | (0.019) | ||
| offense3 | -0.829*** | -0.793*** | -0.781*** | -0.744*** | -0.850*** | |
| (0.007) | (0.013) | (0.015) | (0.019) | (0.020) | ||
| offense4 | -1.701*** | -1.666*** | -1.364*** | -1.372*** | -1.491*** | |
| (0.009) | (0.020) | (0.023) | (0.027) | (0.029) | ||
| offense5 | -0.714*** | -0.657*** | -0.804*** | -0.493*** | -0.518*** | |
| (0.042) | (0.079) | (0.086) | (0.099) | (0.099) | ||
| age_at_admit2 | 0.061*** | 0.091*** | 0.161*** | 0.063** | ||
| (0.013) | (0.014) | (0.019) | (0.020) | |||
| age_at_admit3 | -0.069*** | -0.005 | 0.078*** | -0.002 | ||
| (0.015) | (0.016) | (0.022) | (0.023) | |||
| age_at_admit4 | 0.087*** | 0.181*** | 0.266*** | 0.231*** | ||
| (0.023) | (0.026) | (0.036) | (0.038) | |||
| age_at_admit5 | 0.445*** | 0.583*** | 0.493*** | 0.513*** | ||
| (0.040) | (0.044) | (0.060) | (0.062) | |||
| offense2:age_at_admit2 | 0.018 | 0.024 | 0.028 | 0.104*** | ||
| (0.018) | (0.020) | (0.025) | (0.027) | |||
| offense3:age_at_admit2 | 0.026 | 0.042* | 0.096*** | 0.110*** | ||
| (0.018) | (0.020) | (0.025) | (0.027) | |||
| offense4:age_at_admit2 | -0.029 | -0.231*** | -0.334*** | -0.278*** | ||
| (0.025) | (0.029) | (0.035) | (0.037) | |||
| offense5:age_at_admit2 | -0.034 | 0.090 | 0.284 | 0.412** | ||
| (0.108) | (0.117) | (0.151) | (0.151) | |||
| offense2:age_at_admit3 | -0.035 | -0.052* | -0.055 | 0.014 | ||
| (0.020) | (0.022) | (0.029) | (0.030) | |||
| offense3:age_at_admit3 | -0.039* | -0.064** | 0.035 | 0.050 | ||
| (0.019) | (0.021) | (0.028) | (0.030) | |||
| offense4:age_at_admit3 | 0.002 | -0.232*** | -0.340*** | -0.288*** | ||
| (0.026) | (0.030) | (0.037) | (0.039) | |||
| offense5:age_at_admit3 | -0.038 | 0.012 | 0.225 | 0.304 | ||
| (0.112) | (0.121) | (0.165) | (0.165) | |||
| offense2:age_at_admit4 | -0.290*** | -0.329*** | -0.297*** | -0.254*** | ||
| (0.031) | (0.034) | (0.048) | (0.050) | |||
| offense3:age_at_admit4 | -0.248*** | -0.332*** | -0.206*** | -0.221*** | ||
| (0.029) | (0.032) | (0.046) | (0.048) | |||
| offense4:age_at_admit4 | -0.206*** | -0.433*** | -0.561*** | -0.557*** | ||
| (0.037) | (0.042) | (0.054) | (0.058) | |||
| offense5:age_at_admit4 | -0.355* | -0.536** | 0.129 | 0.190 | ||
| (0.166) | (0.178) | (0.285) | (0.287) | |||
| offense2:age_at_admit5 | -0.612*** | -0.663*** | -0.430*** | -0.422*** | ||
| (0.061) | (0.067) | (0.097) | (0.100) | |||
| offense3:age_at_admit5 | -0.424*** | -0.559*** | -0.146 | -0.174 | ||
| (0.056) | (0.061) | (0.089) | (0.094) | |||
| offense4:age_at_admit5 | -0.509*** | -0.732*** | -0.669*** | -0.725*** | ||
| (0.062) | (0.071) | (0.091) | (0.098) | |||
| offense5:age_at_admit5 | -0.252 | -0.312 | 0.211 | 0.243 | ||
| (0.313) | (0.330) | (0.451) | (0.447) | |||
| race2 | 0.100*** | 0.016 | -0.063*** | |||
| (0.007) | (0.012) | (0.012) | ||||
| race3 | -0.084*** | 0.159*** | 0.006 | |||
| (0.008) | (0.016) | (0.018) | ||||
| race4 | -0.348*** | -0.415*** | -0.396*** | |||
| (0.024) | (0.042) | (0.043) | ||||
| education_lvl2 | 0.156*** | 0.100*** | ||||
| (0.013) | (0.014) | |||||
| education_lvl3 | 0.065** | 0.066** | ||||
| (0.023) | (0.024) | |||||
| race2:education_lvl2 | 0.001 | 0.022 | ||||
| (0.018) | (0.019) | |||||
| race3:education_lvl2 | -0.042 | -0.050 | ||||
| (0.027) | (0.030) | |||||
| race4:education_lvl2 | -0.331*** | -0.356*** | ||||
| (0.064) | (0.065) | |||||
| race2:education_lvl3 | -0.138*** | -0.110*** | ||||
| (0.032) | (0.033) | |||||
| race3:education_lvl3 | -0.083 | -0.077 | ||||
| (0.068) | (0.073) | |||||
| race4:education_lvl3 | 0.097 | 0.045 | ||||
| (0.124) | (0.125) | |||||
| sex2 | -0.431*** | |||||
| (0.013) | ||||||
| admit_type2 | 0.714*** | |||||
| (0.010) | ||||||
| admit_type3 | 0.942*** | |||||
| (0.030) | ||||||
| AIC | 1475875.169 | 1447703.100 | 1143010.314 | 691321.405 | 608989.655 | |
| BIC | 1475986.186 | 1448035.622 | 1143370.121 | 691744.319 | 609439.075 | |
| Log Likelihood | -737927.584 | -723821.550 | -571472.157 | -345619.703 | -304450.828 | |
| Deviance | 1475855.169 | 1447643.100 | 1142944.314 | 691239.405 | 608901.655 | |
| Num. obs. | 489790 | 481219 | 401618 | 223009 | 201610 | |
| p < 0.001, p < 0.01, p < 0.05 | ||||||
First model simply verifies that the more serious/violent crime an inmate commits, the longer maximum sentence he/she gets. Among other types, the violation of public order causes the shortest length of maximum sentence. All coefficients are statistically significant at 0.001 level. Second model adds an interaction effect between the type of offense and age at admission. Inmates in other age groups have higher chance of getting less severe sentences when committed less serious crime such as property crime, except the youngest age group (18-24 years); but it is not statistically significant. Particularly, those in age group 4 (45-54 years) and 5 (over 55 years) who committed property, violated public order and other crimes are less likely to get the most severe sentence compared to their counterparts who committed violent crime (at 0.001 level).
Model 3 adds race variable and only Black inmates face higher chance of getting more severe sentence compared to White inmates at 0.001 level as well. Model 4 adds another interaction effect between the race and education level. Higher level of education itself increases the chance of getting more severe sentence at statistically significant level (but not linear). Compared to White inmate with the same level of education respectively, inmates of other race with high school diploma (or GED0) and Black inmates with college level education are statistically less likely to get more severe sentence at 0.001 level. Model 5 (the best fit model with the least AIC and BIC values) adds factors of gender and type of admission. Female inmates are statistically less likely to get more severe sentence at 0.001 level; and those who are admitted due to the parole return/revocation or other reasons including escape are statistically more likely to get more severe sentence at 0.001 level.
z5$setx(sex = '1')
z5$setx1(sex = '2')
z5$sim()
par(mar = c(1.8,1.8,1.8,1.8))
z5$graph()
When other features are assigned constant values (i.e. mode or median), the patterns of both predicted and expected values for males and females are similar, but female inmates are slightly more likely to get more lenient sentences while male inmates have slightly higher chance of getting more severe sentences.
ftd <- z5$get_qi(xvalue = 'x1', qi = 'fd')
summary(ftd)
## 1 2 3
## Min. :0.05145 Min. :0.02402 Min. :1.753e-06
## 1st Qu.:0.05585 1st Qu.:0.02593 1st Qu.:6.756e-03
## Median :0.05732 Median :0.02653 Median :8.118e-03
## Mean :0.05732 Mean :0.02654 Mean :8.090e-03
## 3rd Qu.:0.05876 3rd Qu.:0.02713 3rd Qu.:9.475e-03
## Max. :0.06395 Max. :0.02977 Max. :1.472e-02
## 4 5 6
## Min. :-0.05805 Min. :-0.03773 Min. :-0.005177
## 1st Qu.:-0.05347 1st Qu.:-0.03413 1st Qu.:-0.004306
## Median :-0.05239 Median :-0.03312 Median :-0.004080
## Mean :-0.05235 Mean :-0.03308 Mean :-0.004075
## 3rd Qu.:-0.05122 3rd Qu.:-0.03202 3rd Qu.:-0.003832
## Max. :-0.04753 Max. :-0.02765 Max. :-0.003036
## 7
## Min. :-0.003437
## 1st Qu.:-0.002620
## Median :-0.002421
## Mean :-0.002431
## 3rd Qu.:-0.002225
## Max. :-0.001643
z5 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl + sex + admit_type, model = 'ologit', data = prison2000, cite = F)
z5$setx(sex = '1', offense = '1')
z5$setx1(sex = '2', offense = '1')
z5$sim()
par(mar=c(1.8,1.8,1.8,1.8))
z5$graph()
z5$setx(sex = '1', offense = '2')
z5$setx1(sex = '2', offense = '2')
z5$sim()
par(mar=c(1.8,1.8,1.8,1.8))
z5$graph()
z5$setx(sex = '1', offense = '3')
z5$setx1(sex = '2', offense = '3')
z5$sim()
par(mar=c(1.8,1.8,1.8,1.8))
z5$graph()
A pattern of gender effects do not seem to vary much in different types of crime. In all three categories of crime, female inmates are slightly more likely to get more lenient sentences while male inmates have slightly higher chance of getting more severe sentences.
Overall, all the independent variable of interest including some significant effects from an interaction between two, have impact on whether an inmate is sentenced to more lenient or severe punishment (maximum sentence length). Particularly, it is shown that gender inequality exists in some level that female inmates are likely to get more lenient sentence.
United States Department of Justice. Office of Justice Programs. Bureau of Justice Statistics. National Corrections Reporting Program, 1991-2015; Selected Variables. Ann Arbor, MI: Inter-university Consortium for Political and Social Research [distributor], 2018-03-02. (https://doi.org/10.3886/ICPSR36862.v1)