Sungji Peter Shin
3.31.2019

Analysis of the National Correction Reporting Program (2000).

Original dataset contains offender-level data on admissions and releases from state prison, post-confinement community supervision and yearend prison custody records. The purpose of the collection is “to monitor the nation’s correctional population and address specific policy questions related to recidivism, prisoner reentry, and trends in demographic characteristics of the incarcerated and community supervision populations” (). A collection of the data has been made annually and the participation in the data collection is voluntary so that “not all states participate, and not all states have participated for each year” (). Overall number of cases is 14,015,190 and number of variables is 18; however, the scope of this analysis will be limited to 511,253 number of cases that represent inmates who released in year of 2000.

library(sjmisc)
library(tibble)
library(dplyr)
library(Zelig)
library(tibble)
library(tidyverse)
library(tidyr)
library(texreg)
library(pander)
library(data.table)
require(data.table)
prison <- as.data.frame(fread("C:/Users/jw/Desktop/ICPSR_36862/DS0003/36862-0003-Data.tsv"))
head(prison)
##   SEX RPTYEAR STATE EDUCATION ADMTYPE OFFGENERAL MAND_PRISREL_YEAR
## 1   1    1991     1         3       1          1                NA
## 2   1    1991     1         2       1          1                NA
## 3   1    1991     1         2       1          3                NA
## 4   1    1991     1         2       1          2                NA
## 5   1    1991     1         3       1          4                NA
## 6   2    1991     1         1       1          3                NA
##   PROJ_PRISREL_YEAR PARELIG_YEAR ADMITYR SENTLGTH OFFDETAIL RACE AGEADMIT
## 1                NA           NA    1983        5         1    1        2
## 2                NA           NA    1987        4         4    1        2
## 3                NA           NA    1987        4        12    1        4
## 4                NA           NA    1989        4         7    2        3
## 5                NA           NA    1989        3        13    9        2
## 6                NA           NA    1991        0        12    2        2
##   RELYR RELTYPE AGERLSE TIMESRVD_REL
## 1  1991       1       3            3
## 2  1991       2       2            2
## 3  1991       3       4            2
## 4  1991       2       3            2
## 5  1991       2       2            2
## 6  1991       1       2            0
prison2000 <- filter(prison, RPTYEAR == 2000)
dim(prison2000)
## [1] 511253     18
# renaming some of the variables
prison2000 <- rename(prison2000, sex = SEX, year_reported = RPTYEAR, release_type = RELTYPE, offense_detail = OFFDETAIL,  state = STATE, education_lvl = EDUCATION, admit_type = ADMTYPE, offense = OFFGENERAL, admit_year = ADMITYR, max_sentence = SENTLGTH, race = RACE, age_at_admit = AGEADMIT, released_year = RELYR, age_at_release = AGERLSE, time_served = TIMESRVD_REL)

# selecting variables of interest
prison2000 <- prison2000 %>%
  select(year_reported, state, sex, education_lvl, race, age_at_admit, age_at_release, time_served, admit_type, release_type, offense, offense_detail, max_sentence, admit_year, released_year)

# converting integer variables into factors 
prison2000 <- prison2000 %>%
  mutate(
    state = as.factor(state),
    sex = as.factor(sex),
    education_lvl = as.factor(education_lvl),
    race = as.factor(race),
    age_at_admit = as.factor(age_at_admit),
    age_at_release = as.factor(age_at_release),
    time_served = as.factor(time_served),
    admit_type = as.factor(admit_type),
    release_type = as.factor(release_type),
    offense = as.factor(offense),
    offense_detail = as.factor(offense_detail),
    max_sentence = as.factor(max_sentence)
    )

prison2000 <- prison2000 %>%
  mutate(max_sentence = sjmisc::rec(max_sentence, rec = '0=1; 1=2; 2=3; 3=4; 4=5; 5=6; 6=7; 9=9')) %>%
  mutate(max_sentence = as.factor(max_sentence))

# creating a new variable
# prison2000 <- prison2000 %>%
#  mutate(time = released_year - admit_year)
# table(prison2000$time)

# recoding some values to missing 
prison2000$time[prison2000$time == -7999] <- NA
prison2000$time[prison2000$time == 0] <- NA
prison2000$education_lvl[prison2000$education_lvl == '99'] <- NA
prison2000$race[prison2000$race == '9'] <- NA
prison2000$age_at_admit[prison2000$age_at_admit == '9'] <- NA
prison2000$age_at_release[prison2000$age_at_release == '9'] <- NA
prison2000$time_served[prison2000$time_served == '9'] <- NA
prison2000$admit_type[prison2000$admit_type == '9'] <- NA
prison2000$release_type[prison2000$release_type == '9'] <- NA
prison2000$offense[prison2000$offense == '9'] <- NA
prison2000$offense_detail[prison2000$offense_detail == '99'] <- NA
prison2000$max_sentence[prison2000$max_sentence == '9'] <- NA

prison2000 %>%
  mutate(
    state = fct_recode(state, 'AL' = '1', 'AK' = '2', 'AZ' = '4', 'AR' = '5', 'CA' = '6', 'CO' = '8', 'CT' = '9', 'DE' = '10', 'DC' = '11', 'FL' = '12', 'GA' = '13', 'HI' = '15', 'ID' = '16', 'IL' = '17', 'IN' = '18', 'IA' = '19', 'KS' = '20', 'KY' = '21', 'LA' = '22', 'ME' = '23', 'MD' = '24', 'MA' = '25', 'MI' = '26', 'MN' = '27', 'MS' = '28', 'MO' = '29', 'MT' = '30', 'NE' = '31', 'NV' = '32', 'NH' = '33', 'NJ' = '34', 'NM' = '35', 'NY' = '36', 'NC' = '37', 'ND' = '38', 'OH' = '39', 'OK' = '40', 'OR' = '41', 'PA' = '42', 'RI' = '44', 'SC' = '45', 'SD' = '46', 'TN' = '47', 'TX' = '48', 'UT' = '49', 'VT' = '50', 'VA' = '51', 'WA' = '53', 'WV' = '54', 'WI' = '55', 'WY' = '56'),
    sex = fct_recode(sex, 'male' = '1', 'female' = '2'),
    education_lvl = fct_recode(education_lvl, 'less_than_high_school' = '1', 'high_school_diploma/GED' = '2', 'any_college' = '3', 'unknown' = '99'),
    race = fct_recode(race, 'White_non_hispanic' = '1', 'Black_non_hispanic' = '2', 'Hispanic' = '3', 'Other' = '4', 'Missing' = '9'),
    age_at_admit = fct_recode(age_at_admit, '18-24' = '1', '25-34' = '2', '35-44' = '3', '45-54' = '4', 'older than 54' = '5', 'Missing' = '9'),
    age_at_release = fct_recode(age_at_release, '18-24' = '1', '25-34' = '2', '35-44' = '3', '45-54' = '4', 'older than 54' = '5', 'Missing' = '9'),
    time_served = fct_recode(time_served, 'less than 1' = '0', '1-1.9' = '1', '2-4.9' = '2', '5-9.9' = '3', 'above 10' = '4', 'Missing' = '9'),
    admit_type = fct_recode(admit_type, 'new_court_commitment' = '1', 'parole_return/revocation' = '2', 'other_admission' = '3', 'Missing' = '9'),
    release_type = fct_recode(release_type, 'conditional_release' = '1', 'unconditional_release' = '2', 'other_release' = '3', 'Missing' = '9'),
    offense = fct_recode(offense, 'violent' = '1', 'property' = '2', 'drugs' = '3', 'public_order' = '4', 'other' = '5', 'Missing' = '9'),
    offense_detail = fct_recode(offense_detail, 'Murder' = '1', 'Negligent_manslaughter' = '2', 'Rape/sexual_assault' = '3', 'Robbery' = '4', 'Aggravated/simple_assault' = '5', 'Other_violent' = '6', 'Burglary' = '7', 'Larceny' = '8', 'Motor_vehicle_theft' = '9', 'Fraud' = '10', 'Other_property' = '11', 'Drugs' = '12', 'Public_order' = '13', 'Other' = '14', 'Missing' = '99'),
    max_sentence = fct_recode(max_sentence, 'less than 1' = '1', '1-1.9' = '2', '2-4.9' = '3', '5-9.9' = '4', '10-24.9' = '5', 'above 25' = '6', 'Life' = '7', 'Missing' = '9')
    )
head(prison2000)

Variables of interest:

A dependent variable is the maximum sentence length for each inmate, ordered from less than 1 year as the least to the life sentence as the most severe. Independent variables of interest include a type of offense (violent, property, drugs, etc.), age at admission to the facility, race, education level, gender, and type of admission (new court commitment, parole return/revocation, or other admission including unsentenced, transfer, AWOL/escape return). Among many possible hypotheses, I particularly expect that there would be a gender inequality when other factors are being constant.

library(zeligverse)
## + Zelig       5.1.6        Date: 2019-03-31
## + ZeligChoice 0.9.6           R: 3.5.2
## + ZeligEI     0.1.2          OS: Windows 7 x64 SP 1
## + Amelia      1.7.5         GUI: RTerm
## + MatchIt     3.0.2      Locale: Korean_Korea.949
## + WhatIf      1.5.9          TZ: America/New_York
prison2000$max_sentence <- factor(prison2000$max_sentence, ordered = TRUE, levels = c('1', '2', '3', '4', '5', '6', '7'))
table(prison2000$max_sentence)
## 
##      1      2      3      4      5      6      7 
##  53463  55560 208622 115185  57670  13058   2702
z1 = zelig(max_sentence ~ offense, model = 'ologit', data = prison2000, cite = F)
z2 = zelig(max_sentence ~ offense * age_at_admit, model = 'ologit', data = prison2000, cite = F)
z3 = zelig(max_sentence ~ offense * age_at_admit + race, model = 'ologit', data = prison2000, cite = F)
z4 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl, model = 'ologit', data = prison2000, cite = F)
z5 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl + sex, model = 'ologit', data = prison2000, cite = F)
z5 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl + sex + admit_type, model = 'ologit', data = prison2000, cite = F)


table1 <- htmlreg(list(z1, z2, z3, z4, z5), digits = 3)
pander(table1)
Statistical models
Model 1 Model 2 Model 3 Model 4 Model 5
offense2 -0.906*** -0.866*** -0.940*** -0.917*** -1.022***
(0.007) (0.013) (0.015) (0.018) (0.019)
offense3 -0.829*** -0.793*** -0.781*** -0.744*** -0.850***
(0.007) (0.013) (0.015) (0.019) (0.020)
offense4 -1.701*** -1.666*** -1.364*** -1.372*** -1.491***
(0.009) (0.020) (0.023) (0.027) (0.029)
offense5 -0.714*** -0.657*** -0.804*** -0.493*** -0.518***
(0.042) (0.079) (0.086) (0.099) (0.099)
age_at_admit2 0.061*** 0.091*** 0.161*** 0.063**
(0.013) (0.014) (0.019) (0.020)
age_at_admit3 -0.069*** -0.005 0.078*** -0.002
(0.015) (0.016) (0.022) (0.023)
age_at_admit4 0.087*** 0.181*** 0.266*** 0.231***
(0.023) (0.026) (0.036) (0.038)
age_at_admit5 0.445*** 0.583*** 0.493*** 0.513***
(0.040) (0.044) (0.060) (0.062)
offense2:age_at_admit2 0.018 0.024 0.028 0.104***
(0.018) (0.020) (0.025) (0.027)
offense3:age_at_admit2 0.026 0.042* 0.096*** 0.110***
(0.018) (0.020) (0.025) (0.027)
offense4:age_at_admit2 -0.029 -0.231*** -0.334*** -0.278***
(0.025) (0.029) (0.035) (0.037)
offense5:age_at_admit2 -0.034 0.090 0.284 0.412**
(0.108) (0.117) (0.151) (0.151)
offense2:age_at_admit3 -0.035 -0.052* -0.055 0.014
(0.020) (0.022) (0.029) (0.030)
offense3:age_at_admit3 -0.039* -0.064** 0.035 0.050
(0.019) (0.021) (0.028) (0.030)
offense4:age_at_admit3 0.002 -0.232*** -0.340*** -0.288***
(0.026) (0.030) (0.037) (0.039)
offense5:age_at_admit3 -0.038 0.012 0.225 0.304
(0.112) (0.121) (0.165) (0.165)
offense2:age_at_admit4 -0.290*** -0.329*** -0.297*** -0.254***
(0.031) (0.034) (0.048) (0.050)
offense3:age_at_admit4 -0.248*** -0.332*** -0.206*** -0.221***
(0.029) (0.032) (0.046) (0.048)
offense4:age_at_admit4 -0.206*** -0.433*** -0.561*** -0.557***
(0.037) (0.042) (0.054) (0.058)
offense5:age_at_admit4 -0.355* -0.536** 0.129 0.190
(0.166) (0.178) (0.285) (0.287)
offense2:age_at_admit5 -0.612*** -0.663*** -0.430*** -0.422***
(0.061) (0.067) (0.097) (0.100)
offense3:age_at_admit5 -0.424*** -0.559*** -0.146 -0.174
(0.056) (0.061) (0.089) (0.094)
offense4:age_at_admit5 -0.509*** -0.732*** -0.669*** -0.725***
(0.062) (0.071) (0.091) (0.098)
offense5:age_at_admit5 -0.252 -0.312 0.211 0.243
(0.313) (0.330) (0.451) (0.447)
race2 0.100*** 0.016 -0.063***
(0.007) (0.012) (0.012)
race3 -0.084*** 0.159*** 0.006
(0.008) (0.016) (0.018)
race4 -0.348*** -0.415*** -0.396***
(0.024) (0.042) (0.043)
education_lvl2 0.156*** 0.100***
(0.013) (0.014)
education_lvl3 0.065** 0.066**
(0.023) (0.024)
race2:education_lvl2 0.001 0.022
(0.018) (0.019)
race3:education_lvl2 -0.042 -0.050
(0.027) (0.030)
race4:education_lvl2 -0.331*** -0.356***
(0.064) (0.065)
race2:education_lvl3 -0.138*** -0.110***
(0.032) (0.033)
race3:education_lvl3 -0.083 -0.077
(0.068) (0.073)
race4:education_lvl3 0.097 0.045
(0.124) (0.125)
sex2 -0.431***
(0.013)
admit_type2 0.714***
(0.010)
admit_type3 0.942***
(0.030)
AIC 1475875.169 1447703.100 1143010.314 691321.405 608989.655
BIC 1475986.186 1448035.622 1143370.121 691744.319 609439.075
Log Likelihood -737927.584 -723821.550 -571472.157 -345619.703 -304450.828
Deviance 1475855.169 1447643.100 1142944.314 691239.405 608901.655
Num. obs. 489790 481219 401618 223009 201610
p < 0.001, p < 0.01, p < 0.05

First model simply verifies that the more serious/violent crime an inmate commits, the longer maximum sentence he/she gets. Among other types, the violation of public order causes the shortest length of maximum sentence. All coefficients are statistically significant at 0.001 level. Second model adds an interaction effect between the type of offense and age at admission. Inmates in other age groups have higher chance of getting less severe sentences when committed less serious crime such as property crime, except the youngest age group (18-24 years); but it is not statistically significant. Particularly, those in age group 4 (45-54 years) and 5 (over 55 years) who committed property, violated public order and other crimes are less likely to get the most severe sentence compared to their counterparts who committed violent crime (at 0.001 level).
Model 3 adds race variable and only Black inmates face higher chance of getting more severe sentence compared to White inmates at 0.001 level as well. Model 4 adds another interaction effect between the race and education level. Higher level of education itself increases the chance of getting more severe sentence at statistically significant level (but not linear). Compared to White inmate with the same level of education respectively, inmates of other race with high school diploma (or GED0) and Black inmates with college level education are statistically less likely to get more severe sentence at 0.001 level. Model 5 (the best fit model with the least AIC and BIC values) adds factors of gender and type of admission. Female inmates are statistically less likely to get more severe sentence at 0.001 level; and those who are admitted due to the parole return/revocation or other reasons including escape are statistically more likely to get more severe sentence at 0.001 level.

Gender inequality

z5$setx(sex = '1')
z5$setx1(sex = '2')
z5$sim()
par(mar = c(1.8,1.8,1.8,1.8))
z5$graph()

When other features are assigned constant values (i.e. mode or median), the patterns of both predicted and expected values for males and females are similar, but female inmates are slightly more likely to get more lenient sentences while male inmates have slightly higher chance of getting more severe sentences.

ftd <- z5$get_qi(xvalue = 'x1', qi = 'fd')
summary(ftd)
##        1                 2                 3            
##  Min.   :0.05145   Min.   :0.02402   Min.   :1.753e-06  
##  1st Qu.:0.05585   1st Qu.:0.02593   1st Qu.:6.756e-03  
##  Median :0.05732   Median :0.02653   Median :8.118e-03  
##  Mean   :0.05732   Mean   :0.02654   Mean   :8.090e-03  
##  3rd Qu.:0.05876   3rd Qu.:0.02713   3rd Qu.:9.475e-03  
##  Max.   :0.06395   Max.   :0.02977   Max.   :1.472e-02  
##        4                  5                  6            
##  Min.   :-0.05805   Min.   :-0.03773   Min.   :-0.005177  
##  1st Qu.:-0.05347   1st Qu.:-0.03413   1st Qu.:-0.004306  
##  Median :-0.05239   Median :-0.03312   Median :-0.004080  
##  Mean   :-0.05235   Mean   :-0.03308   Mean   :-0.004075  
##  3rd Qu.:-0.05122   3rd Qu.:-0.03202   3rd Qu.:-0.003832  
##  Max.   :-0.04753   Max.   :-0.02765   Max.   :-0.003036  
##        7            
##  Min.   :-0.003437  
##  1st Qu.:-0.002620  
##  Median :-0.002421  
##  Mean   :-0.002431  
##  3rd Qu.:-0.002225  
##  Max.   :-0.001643

Gender effect in different types of crime (Violent, Property, and Drugs):

z5 = zelig(max_sentence ~ offense * age_at_admit + race * education_lvl + sex + admit_type, model = 'ologit', data = prison2000, cite = F)

z5$setx(sex = '1', offense = '1')
z5$setx1(sex = '2', offense = '1')
z5$sim()
par(mar=c(1.8,1.8,1.8,1.8))
z5$graph()

z5$setx(sex = '1', offense = '2')
z5$setx1(sex = '2', offense = '2')
z5$sim()
par(mar=c(1.8,1.8,1.8,1.8))
z5$graph()

z5$setx(sex = '1', offense = '3')
z5$setx1(sex = '2', offense = '3')
z5$sim()
par(mar=c(1.8,1.8,1.8,1.8))
z5$graph()

A pattern of gender effects do not seem to vary much in different types of crime. In all three categories of crime, female inmates are slightly more likely to get more lenient sentences while male inmates have slightly higher chance of getting more severe sentences.

Conclusion

Overall, all the independent variable of interest including some significant effects from an interaction between two, have impact on whether an inmate is sentenced to more lenient or severe punishment (maximum sentence length). Particularly, it is shown that gender inequality exists in some level that female inmates are likely to get more lenient sentence.

Reference

United States Department of Justice. Office of Justice Programs. Bureau of Justice Statistics. National Corrections Reporting Program, 1991-2015; Selected Variables. Ann Arbor, MI: Inter-university Consortium for Political and Social Research [distributor], 2018-03-02. (https://doi.org/10.3886/ICPSR36862.v1)