Texas Death Row

This follows from a story about the The Creepy Online Archive of Texas Death Row Inmates' Final Words and discovering that some demographic data about the last 500 death row inmates is stored in an HTML table.

UPDATED on July 3, 2013 to add quartile map.

# Based on a tip from here:
# http://www.r-bloggers.com/scraping-table-from-html-web-with-cloudstat/
library(XML)
deathTX <- "http://www.tdcj.state.tx.us/death_row/dr_executed_offenders.html"
deathTX.table <- readHTMLTable(deathTX, header = T, which = 1, stringsAsFactors = TRUE)

Since deathTX.table contains all factors, convert Age to numeric:

# Convert age as factor to age as numeric:
deathTX.table$Age <- as.numeric(levels(deathTX.table$Age))[deathTX.table$Age]

Basic summary of Age at time of death

summary(deathTX.table$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      24      32      38      39      44      66

Percentage of Race on Death Row, as classified by Texas Dept. of Criminal Justice

(table(deathTX.table$Race)/length(deathTX.table$Race)) * 100
## 
##    Black Hispanic    Other    White 
##     37.4     17.2      0.4     45.0

Prepare data for time analysis:

library(lubridate)

# Convert date as factor to date as character
deathTX.table$Date <- as.character(deathTX.table$Date)

# Then convert to MDY date format (requires vector to be in character or
# numeric first); Time is Central Standard, per Texas
deathTX.table$Date <- mdy(deathTX.table$Date, tz = "America/Chicago")

# Create a new variable with Year only
deathTX.table$Year <- year(deathTX.table$Date)

# Create a new variable with Year as a factor
deathTX.table$YearF <- as.factor(deathTX.table$Year)

Make some plots:

library(ggplot2)
deathRace1 <- ggplot(deathTX.table, aes(x = YearF)) + geom_bar(stat = "bin") + 
    labs(x = "Year", y = "Count", title = "Deaths per Year")

Bar plot:

deathRace1

plot of chunk unnamed-chunk-7

Bar plot, faceted by Race:

deathRace1 + facet_grid(Race ~ .)

plot of chunk unnamed-chunk-8

Stacked plot:

deathRace2 <- ggplot(deathTX.table, aes(x = YearF, fill = factor(Race))) + geom_bar(stat = "bin", 
    position = "fill") + labs(x = "Year", y = "Proportion", title = "Deaths per Year by Race")
deathRace2

plot of chunk unnamed-chunk-10

Data for above faceted plot:

table(deathTX.table$YearF, deathTX.table$Race)
##       
##        Black Hispanic Other White
##   1982     1        0     0     0
##   1984     0        0     0     3
##   1985     1        2     0     3
##   1986     2        2     0     6
##   1987     2        2     0     2
##   1988     2        0     0     1
##   1989     1        1     0     2
##   1990     2        0     0     2
##   1991     2        1     0     2
##   1992     5        2     0     5
##   1993     7        4     0     6
##   1994     4        1     0     9
##   1995     8        2     1     8
##   1996     1        1     0     1
##   1997    13        2     1    21
##   1998     2        5     0    13
##   1999    11        7     0    17
##   2000    16        5     0    19
##   2001     6        1     0    10
##   2002    11        5     0    17
##   2003     7        3     0    14
##   2004    12        3     0     8
##   2005     5        3     0    11
##   2006    14        5     0     5
##   2007     8        6     0    12
##   2008     9        3     0     6
##   2009    13        7     0     4
##   2010     5        5     0     7
##   2011     3        4     0     6
##   2012     7        4     0     4
##   2013     7        0     0     1

Draw basic maps highlighting Counties where offenses occurred.

The following two maps show range of occurrences by county. The first map splits it by median values. The second map splits it by quartile values.

First, prepare data:

# Based on some tips from here:
# http://statistical-research.com/earthquakes-over-the-past-7-days/
library(maps)

x <- data.frame(table(deathTX.table$County))
colnames(x) <- c("County", "Freq")

# Convert County names to match 'county,state' format in 'maps' package
x$County <- tolower(x$County)
x$County <- gsub("^", "texas,", x$County)

# Create new vector based on calculation of median values of times
# counties are represented
x$med.col[x$Freq <= median(x$Freq)] <- "Blue"
x$med.col[x$Freq > median(x$Freq)] <- "Red"

Draw map based on medians:

# Draw county map of Texas
map("county", "texas", fill = FALSE)

# Fill in with Blue counties with less or equal to median values
map("county", region = x$County[x$med.col == "Blue"], fill = TRUE, col = "Blue", 
    add = TRUE)

# Fill in with Red counties with greater than median values
map("county", region = x$County[x$med.col == "Red"], fill = TRUE, col = "Red", 
    add = TRUE)

title("Texas County Death Row Map")
legend("bottomleft", c("<= Median", ">   Median"), pch = 16, col = c("Blue", 
    "Red"))

plot of chunk unnamed-chunk-13

Create a vector based on quartile values and draw map based on quartile range:

x$quant.col[x$Freq <= quantile(x$Freq)[2]] <- "Blue"

x$quant.col[x$Freq <= quantile(x$Freq)[3] & x$Freq > quantile(x$Freq)[2]] <- "Green"

x$quant.col[x$Freq <= quantile(x$Freq)[4] & x$Freq > quantile(x$Freq)[3]] <- "Purple"

x$quant.col[x$Freq <= quantile(x$Freq)[5] & x$Freq > quantile(x$Freq)[4]] <- "Orange"

Draw quartile map:

# Draw county map of Texas
map("county", "texas", fill = FALSE)

map("county", region = x$County[x$quant.col == "Blue"], fill = TRUE, col = "Blue", 
    add = TRUE)

map("county", region = x$County[x$quant.col == "Green"], fill = TRUE, col = "Green", 
    add = TRUE)

map("county", region = x$County[x$quant.col == "Purple"], fill = TRUE, col = "Purple", 
    add = TRUE)

map("county", region = x$County[x$quant.col == "Orange"], fill = TRUE, col = "Orange", 
    add = TRUE)

# Add title and legend
title("Texas County Death Row Map")

legend("bottomleft", c("25%", "50%", "75%", "100%"), pch = 16, col = c("Blue", 
    "Green", "Purple", "Orange"), ncol = 2, title = "Quartiles")

plot of chunk unnamed-chunk-15

Note that Texas has a total of 254 counties.

Display summary and total data for above map:

summary(x)
##     County               Freq          med.col           quant.col        
##  Length:88          Min.   :  1.00   Length:88          Length:88         
##  Class :character   1st Qu.:  1.00   Class :character   Class :character  
##  Mode  :character   Median :  2.00   Mode  :character   Mode  :character  
##                     Mean   :  5.68                                        
##                     3rd Qu.:  4.25                                        
##                     Max.   :118.00
x
##                County Freq med.col quant.col
## 1      texas,anderson    4     Red    Purple
## 2       texas,aransas    1    Blue      Blue
## 3      texas,atascosa    1    Blue      Blue
## 4        texas,bailey    1    Blue      Blue
## 5       texas,bastrop    1    Blue      Blue
## 6           texas,bee    1    Blue      Blue
## 7          texas,bell    3     Red    Purple
## 8         texas,bexar   37     Red    Orange
## 9         texas,bowie    5     Red    Orange
## 10     texas,brazoria    4     Red    Purple
## 11       texas,brazos   12     Red    Orange
## 12        texas,brown    1    Blue      Blue
## 13     texas,caldwell    1    Blue      Blue
## 14      texas,cameron    6     Red    Orange
## 15     texas,chambers    1    Blue      Blue
## 16     texas,cherokee    3     Red    Purple
## 17         texas,clay    1    Blue      Blue
## 18       texas,collin    6     Red    Orange
## 19        texas,comal    2    Blue     Green
## 20      texas,coryell    1    Blue      Blue
## 21     texas,crockett    1    Blue      Blue
## 22       texas,dallas   51     Red    Orange
## 23       texas,dawson    1    Blue      Blue
## 24       texas,denton    6     Red    Orange
## 25        texas,ellis    1    Blue      Blue
## 26      texas,el paso    3     Red    Purple
## 27    texas,fort bend    5     Red    Orange
## 28    texas,freestone    1    Blue      Blue
## 29    texas,galveston    6     Red    Orange
## 30    texas,gillespie    1    Blue      Blue
## 31      texas,grayson    2    Blue     Green
## 32        texas,gregg    5     Red    Orange
## 33         texas,hale    2    Blue     Green
## 34     texas,hamilton    1    Blue      Blue
## 35       texas,hardin    1    Blue      Blue
## 36       texas,harris  118     Red    Orange
## 37     texas,harrison    1    Blue      Blue
## 38    texas,henderson    2    Blue     Green
## 39      texas,hidalgo    2    Blue     Green
## 40      texas,hopkins    1    Blue      Blue
## 41         texas,hunt    3     Red    Purple
## 42    texas,jefferson   15     Red    Orange
## 43      texas,johnson    2    Blue     Green
## 44        texas,jones    1    Blue      Blue
## 45      texas,kaufman    1    Blue      Blue
## 46      texas,kendall    1    Blue      Blue
## 47         texas,kerr    2    Blue     Green
## 48      texas,kleberg    1    Blue      Blue
## 49        texas,lamar    2    Blue     Green
## 50          texas,lee    1    Blue      Blue
## 51         texas,leon    1    Blue      Blue
## 52      texas,liberty    3     Red    Purple
## 53        texas,llano    1    Blue      Blue
## 54      texas,lubbock   10     Red    Orange
## 55    texas,matagorda    2    Blue     Green
## 56     texas,mclennan    7     Red    Orange
## 57        texas,milam    1    Blue      Blue
## 58   texas,montgomery   15     Red    Orange
## 59       texas,morris    1    Blue      Blue
## 60  texas,nacogdoches    1    Blue      Blue
## 61      texas,navarro    6     Red    Orange
## 62       texas,newton    1    Blue      Blue
## 63       texas,nueces   14     Red    Orange
## 64       texas,parker    2    Blue     Green
## 65        texas,pecos    2    Blue     Green
## 66         texas,polk    2    Blue     Green
## 67       texas,potter   10     Red    Orange
## 68      texas,randall    3     Red    Purple
## 69    texas,red river    2    Blue     Green
## 70      texas,refugio    2    Blue     Green
## 71       texas,sabine    1    Blue      Blue
## 72  texas,san jacinto    1    Blue      Blue
## 73 texas,san patricio    1    Blue      Blue
## 74       texas,scurry    1    Blue      Blue
## 75       texas,shelby    1    Blue      Blue
## 76        texas,smith   10     Red    Orange
## 77      texas,tarrant   37     Red    Orange
## 78       texas,taylor    5     Red    Orange
## 79    texas,tom green    3     Red    Purple
## 80       texas,travis    8     Red    Orange
## 81      texas,trinity    1    Blue      Blue
## 82       texas,upshur    1    Blue      Blue
## 83     texas,victoria    1    Blue      Blue
## 84       texas,walker    3     Red    Purple
## 85      texas,wichita    2    Blue     Green
## 86    texas,wilbarger    2    Blue     Green
## 87   texas,williamson    3     Red    Purple
## 88         texas,wood    1    Blue      Blue