This follows from a story about the The Creepy Online Archive of Texas Death Row Inmates' Final Words and discovering that some demographic data about the last 500 death row inmates is stored in an HTML table.
UPDATED on July 3, 2013 to add quartile map.
# Based on a tip from here:
# http://www.r-bloggers.com/scraping-table-from-html-web-with-cloudstat/
library(XML)
deathTX <- "http://www.tdcj.state.tx.us/death_row/dr_executed_offenders.html"
deathTX.table <- readHTMLTable(deathTX, header = T, which = 1, stringsAsFactors = TRUE)
Since deathTX.table contains all factors, convert Age to numeric:
# Convert age as factor to age as numeric:
deathTX.table$Age <- as.numeric(levels(deathTX.table$Age))[deathTX.table$Age]
Basic summary of Age at time of death
summary(deathTX.table$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24 32 38 39 44 66
Percentage of Race on Death Row, as classified by Texas Dept. of Criminal Justice
(table(deathTX.table$Race)/length(deathTX.table$Race)) * 100
##
## Black Hispanic Other White
## 37.4 17.2 0.4 45.0
Prepare data for time analysis:
library(lubridate)
# Convert date as factor to date as character
deathTX.table$Date <- as.character(deathTX.table$Date)
# Then convert to MDY date format (requires vector to be in character or
# numeric first); Time is Central Standard, per Texas
deathTX.table$Date <- mdy(deathTX.table$Date, tz = "America/Chicago")
# Create a new variable with Year only
deathTX.table$Year <- year(deathTX.table$Date)
# Create a new variable with Year as a factor
deathTX.table$YearF <- as.factor(deathTX.table$Year)
Make some plots:
library(ggplot2)
deathRace1 <- ggplot(deathTX.table, aes(x = YearF)) + geom_bar(stat = "bin") +
labs(x = "Year", y = "Count", title = "Deaths per Year")
Bar plot:
deathRace1
Bar plot, faceted by Race:
deathRace1 + facet_grid(Race ~ .)
Stacked plot:
deathRace2 <- ggplot(deathTX.table, aes(x = YearF, fill = factor(Race))) + geom_bar(stat = "bin",
position = "fill") + labs(x = "Year", y = "Proportion", title = "Deaths per Year by Race")
deathRace2
table(deathTX.table$YearF, deathTX.table$Race)
##
## Black Hispanic Other White
## 1982 1 0 0 0
## 1984 0 0 0 3
## 1985 1 2 0 3
## 1986 2 2 0 6
## 1987 2 2 0 2
## 1988 2 0 0 1
## 1989 1 1 0 2
## 1990 2 0 0 2
## 1991 2 1 0 2
## 1992 5 2 0 5
## 1993 7 4 0 6
## 1994 4 1 0 9
## 1995 8 2 1 8
## 1996 1 1 0 1
## 1997 13 2 1 21
## 1998 2 5 0 13
## 1999 11 7 0 17
## 2000 16 5 0 19
## 2001 6 1 0 10
## 2002 11 5 0 17
## 2003 7 3 0 14
## 2004 12 3 0 8
## 2005 5 3 0 11
## 2006 14 5 0 5
## 2007 8 6 0 12
## 2008 9 3 0 6
## 2009 13 7 0 4
## 2010 5 5 0 7
## 2011 3 4 0 6
## 2012 7 4 0 4
## 2013 7 0 0 1
Draw basic maps highlighting Counties where offenses occurred.
The following two maps show range of occurrences by county. The first map splits it by median values. The second map splits it by quartile values.
First, prepare data:
# Based on some tips from here:
# http://statistical-research.com/earthquakes-over-the-past-7-days/
library(maps)
x <- data.frame(table(deathTX.table$County))
colnames(x) <- c("County", "Freq")
# Convert County names to match 'county,state' format in 'maps' package
x$County <- tolower(x$County)
x$County <- gsub("^", "texas,", x$County)
# Create new vector based on calculation of median values of times
# counties are represented
x$med.col[x$Freq <= median(x$Freq)] <- "Blue"
x$med.col[x$Freq > median(x$Freq)] <- "Red"
Draw map based on medians:
# Draw county map of Texas
map("county", "texas", fill = FALSE)
# Fill in with Blue counties with less or equal to median values
map("county", region = x$County[x$med.col == "Blue"], fill = TRUE, col = "Blue",
add = TRUE)
# Fill in with Red counties with greater than median values
map("county", region = x$County[x$med.col == "Red"], fill = TRUE, col = "Red",
add = TRUE)
title("Texas County Death Row Map")
legend("bottomleft", c("<= Median", "> Median"), pch = 16, col = c("Blue",
"Red"))
Create a vector based on quartile values and draw map based on quartile range:
x$quant.col[x$Freq <= quantile(x$Freq)[2]] <- "Blue"
x$quant.col[x$Freq <= quantile(x$Freq)[3] & x$Freq > quantile(x$Freq)[2]] <- "Green"
x$quant.col[x$Freq <= quantile(x$Freq)[4] & x$Freq > quantile(x$Freq)[3]] <- "Purple"
x$quant.col[x$Freq <= quantile(x$Freq)[5] & x$Freq > quantile(x$Freq)[4]] <- "Orange"
Draw quartile map:
# Draw county map of Texas
map("county", "texas", fill = FALSE)
map("county", region = x$County[x$quant.col == "Blue"], fill = TRUE, col = "Blue",
add = TRUE)
map("county", region = x$County[x$quant.col == "Green"], fill = TRUE, col = "Green",
add = TRUE)
map("county", region = x$County[x$quant.col == "Purple"], fill = TRUE, col = "Purple",
add = TRUE)
map("county", region = x$County[x$quant.col == "Orange"], fill = TRUE, col = "Orange",
add = TRUE)
# Add title and legend
title("Texas County Death Row Map")
legend("bottomleft", c("25%", "50%", "75%", "100%"), pch = 16, col = c("Blue",
"Green", "Purple", "Orange"), ncol = 2, title = "Quartiles")
Note that Texas has a total of 254 counties.
Display summary and total data for above map:
summary(x)
## County Freq med.col quant.col
## Length:88 Min. : 1.00 Length:88 Length:88
## Class :character 1st Qu.: 1.00 Class :character Class :character
## Mode :character Median : 2.00 Mode :character Mode :character
## Mean : 5.68
## 3rd Qu.: 4.25
## Max. :118.00
x
## County Freq med.col quant.col
## 1 texas,anderson 4 Red Purple
## 2 texas,aransas 1 Blue Blue
## 3 texas,atascosa 1 Blue Blue
## 4 texas,bailey 1 Blue Blue
## 5 texas,bastrop 1 Blue Blue
## 6 texas,bee 1 Blue Blue
## 7 texas,bell 3 Red Purple
## 8 texas,bexar 37 Red Orange
## 9 texas,bowie 5 Red Orange
## 10 texas,brazoria 4 Red Purple
## 11 texas,brazos 12 Red Orange
## 12 texas,brown 1 Blue Blue
## 13 texas,caldwell 1 Blue Blue
## 14 texas,cameron 6 Red Orange
## 15 texas,chambers 1 Blue Blue
## 16 texas,cherokee 3 Red Purple
## 17 texas,clay 1 Blue Blue
## 18 texas,collin 6 Red Orange
## 19 texas,comal 2 Blue Green
## 20 texas,coryell 1 Blue Blue
## 21 texas,crockett 1 Blue Blue
## 22 texas,dallas 51 Red Orange
## 23 texas,dawson 1 Blue Blue
## 24 texas,denton 6 Red Orange
## 25 texas,ellis 1 Blue Blue
## 26 texas,el paso 3 Red Purple
## 27 texas,fort bend 5 Red Orange
## 28 texas,freestone 1 Blue Blue
## 29 texas,galveston 6 Red Orange
## 30 texas,gillespie 1 Blue Blue
## 31 texas,grayson 2 Blue Green
## 32 texas,gregg 5 Red Orange
## 33 texas,hale 2 Blue Green
## 34 texas,hamilton 1 Blue Blue
## 35 texas,hardin 1 Blue Blue
## 36 texas,harris 118 Red Orange
## 37 texas,harrison 1 Blue Blue
## 38 texas,henderson 2 Blue Green
## 39 texas,hidalgo 2 Blue Green
## 40 texas,hopkins 1 Blue Blue
## 41 texas,hunt 3 Red Purple
## 42 texas,jefferson 15 Red Orange
## 43 texas,johnson 2 Blue Green
## 44 texas,jones 1 Blue Blue
## 45 texas,kaufman 1 Blue Blue
## 46 texas,kendall 1 Blue Blue
## 47 texas,kerr 2 Blue Green
## 48 texas,kleberg 1 Blue Blue
## 49 texas,lamar 2 Blue Green
## 50 texas,lee 1 Blue Blue
## 51 texas,leon 1 Blue Blue
## 52 texas,liberty 3 Red Purple
## 53 texas,llano 1 Blue Blue
## 54 texas,lubbock 10 Red Orange
## 55 texas,matagorda 2 Blue Green
## 56 texas,mclennan 7 Red Orange
## 57 texas,milam 1 Blue Blue
## 58 texas,montgomery 15 Red Orange
## 59 texas,morris 1 Blue Blue
## 60 texas,nacogdoches 1 Blue Blue
## 61 texas,navarro 6 Red Orange
## 62 texas,newton 1 Blue Blue
## 63 texas,nueces 14 Red Orange
## 64 texas,parker 2 Blue Green
## 65 texas,pecos 2 Blue Green
## 66 texas,polk 2 Blue Green
## 67 texas,potter 10 Red Orange
## 68 texas,randall 3 Red Purple
## 69 texas,red river 2 Blue Green
## 70 texas,refugio 2 Blue Green
## 71 texas,sabine 1 Blue Blue
## 72 texas,san jacinto 1 Blue Blue
## 73 texas,san patricio 1 Blue Blue
## 74 texas,scurry 1 Blue Blue
## 75 texas,shelby 1 Blue Blue
## 76 texas,smith 10 Red Orange
## 77 texas,tarrant 37 Red Orange
## 78 texas,taylor 5 Red Orange
## 79 texas,tom green 3 Red Purple
## 80 texas,travis 8 Red Orange
## 81 texas,trinity 1 Blue Blue
## 82 texas,upshur 1 Blue Blue
## 83 texas,victoria 1 Blue Blue
## 84 texas,walker 3 Red Purple
## 85 texas,wichita 2 Blue Green
## 86 texas,wilbarger 2 Blue Green
## 87 texas,williamson 3 Red Purple
## 88 texas,wood 1 Blue Blue