DEM 7263 Segregation Measures

Doing this in R

First we need to load some libraries. We will use R to get all of our census data for us, either from the 2010 100% Summary File 1 (via the UScensus2010 library suite), or the acs package.

library(spdep)
library(seg)
library(UScensus2010)
library(UScensus2010tract)

## 
## UScensus2010tract: US Census 2010 Tract Level Shapefiles and Additional Demographic
## Data 
## Version 1.00 created on 2011-11-06 
## copyright (c) 2011, Zack W. Almquist, University of California-Irvine
## Type help(package="UScensus2010tract") to get started.
## 
## For citation information, type citation("UScensus2010tract").

library(UScensus2010county)

## 
## UScensus2010county: US Census 2010 County Level Shapefiles and Additional
## Demographic Data 
## Version 1.00 created on 2011-11-06 
## copyright (c) 2011, Zack W. Almquist, University of California-Irvine
## Type help(package="UScensus2010county") to get started.
## 
## For citation information, type citation("UScensus2010county").

library(acs)
library(sp)
library(RColorBrewer)

Before we can use the SF 1 data, you must install the various geographies you want to use to make your segregation indices. I’ll be using tracts, summed up to counties. So, i’ll do: install.tract("osx") and install.county("osx") since i’m on a Mac. I’ve had bad luck using install.*("windows") on my Windows box, and typically use install.*("linux") on either linux or windows.

The first example mimics that from Sparks (2014), Where I show how to calculate three common measures of segregation: Dissimiliarity, Isolation/Interaction and Theile’s Entropy index for multiple groups.

The general process is 1. Create a lower-level unit data set, that has your populations subdivided by race, SES, etc. Here i’m using census tracts. + 1a do any calculations, such as percentages on this data set. 2. Sum the populations up to the higher level unit, such as the county below + 2a Again, do any calculations you need to do 3. Use the formula to put the various parts together. 4. Use tapply() to sum your index across lower level units within higher level units 5. congratulate youself.

#create a set of three race variables, total, white and black for each census tract in the state of Texas from the 2010 SF1 data
mydem<-demographics(dem=c("P0030001", "P0030002", "P0030003","P0030004","P0030005","P0030006","P0030007","P0030008"), state="48", statefips=TRUE, level="tract")

#assemble a data frame with sensical names and create a county FIPS code for each tract
#First, I assemble the "other" race group, which in the SF1 is composed of 5 tables P0030004 to P0030008
other<-data.frame(cofips=substr(rownames(mydem), 1,5),fips=rownames(mydem),oth1=unlist(mydem[1,"P0030004"]),
                  oth2=unlist(mydem[1,"P0030005"]),
                  oth3=unlist(mydem[1,"P0030006"]),
                  oth4=unlist(mydem[1,"P0030007"]),
                  oth5=unlist(mydem[1,"P0030008"]))

#build the tract population data
trdat<-data.frame(cofips=substr(rownames(mydem), 1,5),fips=rownames(mydem), total=unlist(mydem[1,"P0030001"]), white=unlist(mydem[1,"P0030002"]), black=unlist(mydem[1,"P0030003"]),other=apply(other[,3:7],1,sum))

#sort the data by county and tract
trdat<-trdat[order(trdat$cofips, trdat$fips),]
#look at the first few cases
head(trdat)

##      cofips        fips total white black other
## 4686  48001 48001950100  4685  4012   452   221
## 4676  48001 48001950401  5422  1825  2266  1331
## 4677  48001 48001950402  7535  2591  3248  1696
## 4683  48001 48001950500  4377  2737   800   840
## 4682  48001 48001950600  6405  3831  1674   900
## 4681  48001 48001950700  2640  1051  1164   425

#We need the county-level totals for the total population and each race group
co_total<-tapply(trdat$total, trdat$cofips, sum)
co_total<-data.frame(cofips=names(unlist(co_total)), pop=unlist(co_total))
co_wht<-tapply(trdat$white, trdat$cofips, sum)
co_wht<-data.frame(cofips=names(unlist(co_wht)), pop=unlist(co_wht))
co_blk<-tapply(trdat$black, trdat$cofips, sum)
co_blk<-data.frame(cofips=names(unlist(co_blk)), pop=unlist(co_blk))
co_oth<-tapply(trdat$other, trdat$cofips, sum)
co_oth<-data.frame(cofips=names(unlist(co_oth)), pop=unlist(co_oth))

#For the multi-group measure of segregation, we also need population proportions
c_pwhite<-co_wht$pop/co_total$pop
c_pblack<-co_blk$pop/co_total$pop
c_pother<-co_oth$pop/co_total$pop

#we assemble them into a county-level data frame with easy names
county_dat<-data.frame(cofips=co_total$cofips, co_total=co_total$pop, co_wht_total=co_wht$pop, co_blk_total=co_blk$pop, co_oth_total=co_oth$pop, c_pwhite=c_pwhite, c_pblack=c_pblack, c_pother=c_pother)

#Now I make the county-level Entropy measure to be used later, it's easier to do it before
#merging back to the tracts.
county_dat$c_ent<-county_dat$c_pwhite*(log(1/county_dat$c_pwhite))+county_dat$c_pblack*(log(1/county_dat$c_pblack))+county_dat$c_pother*(log(1/county_dat$c_pother))
county_dat$c_ent<-ifelse(is.na(county_dat$c_ent)==T, 0,county_dat$c_ent)


#we merge the county data back to the tract data by the county FIPS code
merged<-merge(x=trdat,y=county_dat, by="cofips", all.x=T )
#have a look and make sure it looks ok
head(merged)

##   cofips        fips total white black other co_total co_wht_total
## 1  48001 48001950100  4685  4012   452   221    58458        38632
## 2  48001 48001950401  5422  1825  2266  1331    58458        38632
## 3  48001 48001950402  7535  2591  3248  1696    58458        38632
## 4  48001 48001950500  4377  2737   800   840    58458        38632
## 5  48001 48001950600  6405  3831  1674   900    58458        38632
## 6  48001 48001950700  2640  1051  1164   425    58458        38632
##   co_blk_total co_oth_total  c_pwhite  c_pblack  c_pother     c_ent
## 1        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 2        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 3        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 4        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 5        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 6        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364

That is the mechanics for setting up the data. Now we will calculate the indices:

#2 group segregation measures
#Now we begin the segregation calculations
#Calculate the tract-specific contribution to the county dissimilarity index
merged$d.wb<-.5*(abs(merged$white/merged$co_wht_total - merged$black/merged$co_blk_total))
#The county-level dissimilarity index is just the sum of the tract values within a county, this is easily done with the tapply() function, which in this case will sum the tract-specific contributions to the index within each county.
dissim.wb.tr<-tapply(merged$d.wb, merged$cofips, sum, na.rm=T)
hist(dissim.wb.tr)

#this in interaction index  for blacks and white
#first population is minority population, second is non-minority population
merged$int.wb<-(merged$black/merged$co_blk_total * merged$white/merged$total)
#Again, we sum these within each county
int.wb.tr<-tapply(merged$int.wb, merged$cofips, sum, na.rm=T)
head(int.wb.tr)

##     48001     48003     48005     48007     48009     48011 
## 0.5198444 0.7840112 0.5427863 0.8582850 0.9498711 0.9331931

hist(int.wb.tr)

#this is the isolation index for blacks
merged$iso.b<-(merged$black/merged$co_blk_total * merged$black/merged$total)
#Again, we sum these within each county
isol.b.tr<-tapply(merged$iso.b, merged$cofips, sum, na.rm=T)
head(isol.b.tr)

##       48001       48003       48005       48007       48009       48011 
## 0.307966255 0.018252381 0.290047066 0.018043037 0.006946279 0.005786428

hist(isol.b.tr)

So, those are the typical dissimilarity, interaction and isolation indices for two groups. Next, we calculate Theile’s multi-group Entropy index using whites, blacks and all others, but of course we could break this into as many groups as you want, with only a few more lines of code.

#Multi-group segregation
#Here, I calculate the Theile Entropy index
#Just like for the counties, we also need population proportions at the tract level
merged$tr_pwhite<-merged$white/merged$total
merged$tr_pblk<-merged$black/merged$total
merged$tr_pother<-merged$other/merged$total

#This is the tract-level entropy measure
merged$tr_ent<-merged$tr_pwhite*(log(1/merged$tr_pwhite))+merged$tr_pblk*(log(1/merged$tr_pblk))+merged$tr_pother*(log(1/merged$tr_pother))
merged$tr_ent<-ifelse(is.na(merged$tr_ent)==T, 0,merged$tr_ent)

head(merged)

##   cofips        fips total white black other co_total co_wht_total
## 1  48001 48001950100  4685  4012   452   221    58458        38632
## 2  48001 48001950401  5422  1825  2266  1331    58458        38632
## 3  48001 48001950402  7535  2591  3248  1696    58458        38632
## 4  48001 48001950500  4377  2737   800   840    58458        38632
## 5  48001 48001950600  6405  3831  1674   900    58458        38632
## 6  48001 48001950700  2640  1051  1164   425    58458        38632
##   co_blk_total co_oth_total  c_pwhite  c_pblack  c_pother     c_ent
## 1        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 2        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 3        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 4        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 5        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
## 6        12310         7516 0.6608505 0.2105785 0.1285709 0.8655364
##          d.wb     int.wb       iso.b tr_pwhite    tr_pblk  tr_pother
## 1 0.033566807 0.03144356 0.003542495 0.8563501 0.09647812 0.04717182
## 2 0.068418678 0.06195912 0.076931154 0.3365917 0.41792696 0.24548137
## 3 0.098390888 0.09072816 0.113734109 0.3438620 0.43105508 0.22508295
## 4 0.002930093 0.04063780 0.011878056 0.6253141 0.18277359 0.19191227
## 5 0.018410254 0.08133742 0.035541334 0.5981265 0.26135831 0.14051522
## 6 0.033675922 0.03764382 0.041691160 0.3981061 0.44090909 0.16098485
##      tr_ent
## 1 0.5024684
## 2 1.0759163
## 3 1.0654821
## 4 0.9210035
## 5 0.9338686
## 6 1.0217681

#Now I calculate each tract's contribution to the H index
merged$Hcalc<-(merged$total*(merged$c_ent-merged$tr_ent))/(merged$co_total*merged$c_ent)
merged$Hcalc<-ifelse(is.na(merged$Hcalc)==T, 0, merged$Hcalc)
hist(merged$Hcalc)

#Then, we sum across tracts within counties to get the H index for each county
hindex<-unlist(tapply(merged$Hcalc, merged$cofips, sum))
head(hindex)

##      48001      48003      48005      48007      48009      48011 
## 0.11995495 0.01286551 0.13729605 0.01428389 0.03167802 0.00000000

hist(hindex)

That gives us four indices. Now we turn to more specialized indices: The spatial clustering index, that myself and some colleagues used in this paper and the general neighborhood sorting index derived by Paul Jargowsky.

Both of these are custom-written functions that work but I would minimize the amount of tinkering with them. I plan on putting these on github in a more proper form, but here they are in the raw. They both involve a series of checks that ensure that spatial weights are correct for counties with only 1 tract, etc.

#spatial proximity index
data(texas.tract10)
dat<-texas.tract10[, c(1,2,3,4,12:19)]

#this is my own function to calculate the spatial proximity index
spatseg<- function(dat) {
  nameco<-unique(dat$county)
  
  #####need to change fips and county for particular data set#####
  nwncounty<-tapply(as.character(dat$fips), as.character(dat$county), table)
  nwnco<-sapply(nwncounty, sum, USE.NAMES=T,simplify=T)
  ncos<-ifelse(nwnco>1,1,0)
  counties<-match(nameco, names(nwnco))
  t1<-names(ncos)
  t2<-sort(as.character(nameco))
  dat.sub<-data.frame(county=t2, gt1=ncos)
  subsamp<-subset(dat.sub, dat.sub$gt1>0)
  nco<-dim(subsamp) [1]
  dat.new<-dat[which(as.character(dat$county) %in% as.character(subsamp$county)),]
  nwncounty2<-tapply(dat.new$fips, dat.new$county, table)
  nwnco2<-sapply(nwncounty2, sum)
  nconew<- length(nwnco2[nwnco2>0])
  
  ####potentially need to change these to suite your population subgroups.
  Ptt<-numeric(nconew)
  Ptw <-numeric(nconew)
  Ptb <-numeric(nconew)
  Ptot<-numeric(nconew)
  Wtot<-numeric(nconew)
  Btot<-numeric(nconew)
  
  for (i in 1:nconew)
  {
    
    coi<-dat.new[as.character(dat.new$county)==as.character(subsamp$county[i]),]
#####You will need to customize these names, per your dataset #####
    popt<-coi$P0030001
    popw<-coi$P0030002
    popb<-coi$P0030003
    Ptot  <-aggregate(coi$P0030001, by=list(coi$county), sum, na.rm=T)$x
    Wtot  <-aggregate(coi$P0030002, by=list(coi$county), sum, na.rm=T)$x
    Btot   <-aggregate(coi$P0030003, by=list(coi$county), sum, na.rm=T)$x
    co.nb<-poly2nb(coi, queen=T)
    co.lw<-nb2listw(co.nb, zero.policy=T, style="B")
    co.sm<-nb2mat(co.nb, style="B", zero.policy=T)
     Ptt[i]<-(t(popt)%*%co.sm%*%popt)/Ptot^2
    Ptw[i]<-(t(popw)%*%co.sm%*%popw)/Wtot^2
    Ptb[i]<-(t(popb)%*%co.sm%*%popb)/Btot^2
   #  print (unique(as.character(coi$county)))
  }   
  return(list(Ptt=Ptt , Ptw=Ptw, Ptb=Ptb))
}
spatdat<-spatseg(dat)
Ptt<-spatdat$Ptt
Ptw<-spatdat$Ptw
Ptb<-spatdat$Ptb

####Change fips and county for particular data set####
nameco<-unique(dat$county)
nwncounty<-tapply(as.character(dat$fips), as.character(dat$county), table)
nwnco<-sapply(nwncounty, sum, USE.NAMES=T,simplify=T)
ncos<-ifelse(nwnco>1,1,0)

t1<-names(ncos)
t2<-sort(as.character(nameco))

dat.sub<-data.frame(county=t2, gt1=ncos)
goodcos<-dat.sub[dat.sub$gt1>0,]
dim(goodcos)[1]

## [1] 210

dat.new<-dat[which(as.character(dat$county) %in% as.character(goodcos$county)),]

popdat<-slot(dat.new, "data")

dat1<-subset(popdat, popdat$county %in%goodcos$county)

co.pop.t<-tapply(dat1$P0030001, as.character(dat1$county), sum)   
co.pop.w<- tapply(dat1$P0030002, as.character(dat1$county), sum)
co.pop.b<- tapply(dat1$P0030003, as.character(dat1$county), sum)    


P.tt<-Ptt
P.ww<-Ptw
P.bb<-Ptb

#repeat for white and black
Sp.wb<-((P.ww*co.pop.w)+(P.bb*co.pop.b))/(P.tt*co.pop.t)

Spprox<-data.frame(cofips=paste("48", names(Sp.wb), sep=""), spsegr=Sp.wb) 
head(Spprox)

##     cofips    spsegr
## 001  48001 0.8965521
## 003  48003 0.8163956
## 005  48005 0.9118763
## 007  48007 0.8893582
## 009  48009 0.9576126
## 013  48013 0.8602982

hist(Spprox$spsegr)

So, there’s the spatial clustering index. Finally, I do the GNSI index for incomes. This is an approximate index, since the original paper I think assumes you have individual household incomes with geographic locations, which I don’t have. Instead I relate tract median household income to the county’s median household income. To use the acs library, you need a census developer API key, you can get one here.

I don’t show you my actual key, but you can enter yours like: mykey<-"theykeycensussendsyou"

####Jargowski GNSI

#Get 2010 ACS median household incomes for tracts in Texas
txtracts<-geo.make(state="TX",county="*", tract="*")

income.tract<-acs.fetch(key=mykey,endyear = 2010, span = 5, geography = txtracts, table.number = "B19013")

## Warning: NAs introduced by coercion

tx.trincome<-data.frame(income=income.tract@estimate)
tx.trincome$ids<-paste(income.tract@geography$state, sprintf("%03d", income.tract@geography$county),sprintf("%05s", income.tract@geography$tract), sep="")
names(tx.trincome)<-c("medhhincome", "tractfips")
tx.trincome$cofips<-substr(tx.trincome$tractfips, 1,5)
head(tx.trincome)

##                                              medhhincome   tractfips
## Census Tract 9501, Anderson County, Texas          46027 48001950100
## Census Tract 9504.01, Anderson County, Texas       61442 48001950401
## Census Tract 9504.02, Anderson County, Texas       61538 48001950402
## Census Tract 9505, Anderson County, Texas          32529 48001950500
## Census Tract 9506, Anderson County, Texas          40409 48001950600
## Census Tract 9507, Anderson County, Texas          30245 48001950700
##                                              cofips
## Census Tract 9501, Anderson County, Texas     48001
## Census Tract 9504.01, Anderson County, Texas  48001
## Census Tract 9504.02, Anderson County, Texas  48001
## Census Tract 9505, Anderson County, Texas     48001
## Census Tract 9506, Anderson County, Texas     48001
## Census Tract 9507, Anderson County, Texas     48001

#sum incomes up to the county level
tx.coicome<-tapply(tx.trincome$medhhincome, tx.trincome$cofips, sum, na.rm=T)
tx.coicome<-data.frame(cofips=rownames(tx.coicome), countyincome=unlist(tx.coicome))
head(tx.coicome)

##       cofips countyincome
## 48001  48001       478713
## 48003  48003       212760
## 48005  48005       689433
## 48007  48007       221228
## 48009  48009       160656
## 48011  48011        60530

mergetractcodat<-merge(tx.trincome, tx.coicome, by.x="cofips", by.y="cofips", all.x=T)

mergetractcodat$difftrco<-mergetractcodat$medhhincome-mergetractcodat$countyincome

#here I find the average tract median household income within a county
meanincs<-unlist(tapply(as.numeric(as.character(mergetractcodat$medhhincome)), mergetractcodat$cofips, mean, na.rm=T))
meanincs<-data.frame(ids=as.character(names(meanincs)), comeaninc=meanincs)

#merge tract data to average county income
mergetractcodat2<-merge(mergetractcodat, meanincs, by.x="cofips", by.y="ids", all.x=T, sort=F)
#calculate the difference between each tract income and the county average
mergetractcodat2$dev<-mergetractcodat2$medhhincome-mergetractcodat2$comeaninc
mergetractcodat2$dev<-ifelse(is.na(mergetractcodat2$dev)==T, 0, mergetractcodat2$dev)
#you could write this out and merge it to a tract shapefile in arcmap, or just do it in R

#Texas county shapefile
data("texas.tract10")
datna<-texas.tract10[,c(1:5)]
head(datna@data)

##                 state county  tract        fips P0010001
## texas.tract10_0    48    113 014133 48113014133     3909
## texas.tract10_1    48    113 980000 48113980000        0
## texas.tract10_2    48    113 014134 48113014134     4065
## texas.tract10_3    48    113 014135 48113014135     4451
## texas.tract10_4    48    113 014131 48113014131     3811
## texas.tract10_5    48    113 014132 48113014132     2169

datna@data<-merge(datna@data,mergetractcodat2,by.x="fips", by.y="tractfips", all.x=T, sort=F  )
head(datna@data)

##          fips state county  tract P0010001 cofips medhhincome countyincome
## 1 48113014133    48    113 014133     3909  48113       38344     28835006
## 2 48113980000    48    113 980000        0  48113          NA     28835006
## 3 48113014134    48    113 014134     4065  48113      124196     28835006
## 4 48113014135    48    113 014135     4451  48113       74583     28835006
## 5 48113014131    48    113 014131     3811  48113       53036     28835006
## 6 48113014132    48    113 014132     2169  48113       69048     28835006
##    difftrco comeaninc        dev
## 1 -28796662  54715.38 -16371.381
## 2        NA  54715.38      0.000
## 3 -28710810  54715.38  69480.619
## 4 -28760423  54715.38  19867.619
## 5 -28781970  54715.38  -1679.381
## 6 -28765958  54715.38  14332.619

So, again that step was just putting our data in the right format, tracts merged with county information. Now we can calculate the GNSI. Again, this is a custom written function and it works, don’t tinker too much unless you have to.

GNSI<-function(datna){
  ncos<-length(unique(datna$county))
  counties<-unique(datna$county)
  res<-numeric(ncos)
  
  for (i in 1:ncos){
    #see if there is 1 tract in county == 0 spatial weight
    if(length(datna@polygons[datna$county==counties[i]])==1){
      res[i]<-0
    }  else #see if there are 2 tracst in county == .5 spatial weight
      if(length(datna@polygons[datna$county==counties[i]])==2){
        wts<-matrix(c(.5,.5,.5,.5), ncol=2, nrow=2)
        y<-data.matrix(as.numeric(as.character(datna$dev[datna$county==counties[i]])))
        y<-ifelse(is.na(y),0,y)
        num<-t(y)%*%t(wts)%*%wts%*%y
        denom<-t(y)%*%y
        res[i]<-(num/denom)^.5   
      }  else #if there are more than 2 tracts, I calculate the full blown queen contiguity matrix
        if(length(datna@polygons[datna$county==counties[i]])>2){
          nbs<-poly2nb(datna[datna$county==counties[i],],queen=T) 
          wts<-data.matrix(nb2mat(nbs, style="B", zero.policy=T))
          ntr<-length(datna@polygons[datna$county==counties[i]])
          diag(wts)<-1
          wts<-wts/apply(wts, 1, sum)
          y<-data.matrix(as.numeric(as.character(datna$dev[datna$county==counties[i]])))
          y<-ifelse(is.na(y),0,y)
          num<-t(y)%*%t(wts)%*%wts%*%y
          denom<-t(y)%*%y
          res[i]<-(num/denom)^.5
          
        }
  #  print(i) #this prints the progress
    list(result=res)
  }
  res1<-numeric(ncos)
  for(i in 1:ncos){ res1[i]<-length(datna@polygons[datna$county==counties[i]]); res1[i]}
  
  countydat<-data.frame(county=counties, gnsi=res, ntracts=res1)
  return(countydat)
}

mycountyGNSI<-GNSI(datna)
mycountyGNSI$cofips<-paste("48",mycountyGNSI$county, sep="")
head(mycountyGNSI)

##   county         gnsi ntracts cofips
## 1    113 7.330552e-01     529  48113
## 2    227 4.134520e-01      10  48227
## 3    155 0.000000e+00       1  48155
## 4    153 0.000000e+00       2  48153
## 5    043 2.342210e-15       3  48043
## 6    159 4.855904e-01       3  48159

hist(mycountyGNSI$gnsi)

So, that gets us a bunch of stuff, now I merge these various indices into a single data frame so I can merge it to a shapefile:

#assemble the indices into a dataframe with nice names
county_dat<-data.frame(cofips=names(unlist(dissim.wb.tr)), dissim_wb=unlist(dissim.wb.tr), isolation_b=isol.b.tr, interaction_bw=int.wb.tr, TheileH=hindex)
county_dat<-merge(x=county_dat, y=Spprox, by="cofips", all.x=T, sort=F)
county_dat<-merge(x=county_dat, y=mycountyGNSI[,-1], by="cofips", all.x=T, sort=F)

#have a look at the indices
head(county_dat)

##   cofips dissim_wb isolation_b interaction_bw    TheileH    spsegr
## 1  48001 0.4377915 0.307966255      0.5198444 0.11995495 0.8965521
## 2  48003 0.2227811 0.018252381      0.7840112 0.01286551 0.8163956
## 3  48005 0.4646370 0.290047066      0.5427863 0.13729605 0.9118763
## 4  48007 0.2247782 0.018043037      0.8582850 0.01428389 0.8893582
## 5  48009 0.3273754 0.006946279      0.9498711 0.03167802 0.9576126
## 6  48013 0.1639783 0.008933463      0.8404121 0.01313291 0.8602982
##        gnsi ntracts
## 1 0.5544800      11
## 2 0.1046631       4
## 3 0.3894168      17
## 4 0.1582254       6
## 5 0.4966145       3
## 6 0.3454375       8

#get a numeric summary of them
summary(county_dat[, 2:7])

##    dissim_wb       isolation_b      interaction_bw      TheileH        
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.1054   1st Qu.:0.01492   1st Qu.:0.6910   1st Qu.:0.005786  
##  Median :0.2642   Median :0.05619   Median :0.7837   Median :0.031409  
##  Mean   :0.2586   Mean   :0.09986   Mean   :0.7569   Mean   :0.046053  
##  3rd Qu.:0.4034   3rd Qu.:0.14921   3rd Qu.:0.8671   3rd Qu.:0.069289  
##  Max.   :0.7229   Max.   :0.55264   Max.   :0.9499   Max.   :0.335423  
##                                                                        
##      spsegr            gnsi       
##  Min.   :0.7126   Min.   :0.0000  
##  1st Qu.:0.8530   1st Qu.:0.0000  
##  Median :0.8888   Median :0.2950  
##  Mean   :0.8834   Mean   :0.2807  
##  3rd Qu.:0.9179   3rd Qu.:0.4963  
##  Max.   :1.0563   Max.   :0.8226  
##  NA's   :45       NA's   :1

#Now we need to do some visualization of the indices
#load a US County Shapefile

#the texas county SF1 data is in texas.county10, it also has the county polygons
data("texas.county10")

#I only want the identifiers, so I get rid of the SF1 data
tx_sub<-texas.county10[, c(1:6)]
#I put the attribute table into a new object

#I merge the county attribute table to the segregation indices by county FIPS code. I am careful here not to re-sort the data, they need to be in the same order as the shapefile, and I want to keep all counties, even if they're missing the indices (which none are in this case)
tx_sub@data<-merge(x=tx_sub@data, y=county_dat, by.x="fips", by.y="cofips", all.x=T, sort=F)

Now, I map some of the indices:

#Next i plot the indices, using quantile breaks, and using a nice color ramp from ColorBrewer
cols<-brewer.pal(n=5, name="Reds")
spplot(obj=tx_sub, zcol="dissim_wb",at=quantile(tx_sub$dissim_wb, probs=c(0,.25, .5, .75, 1)), col.regions=cols , main="White-Black Dissimiliarity, 2010")

spplot(obj=tx_sub, zcol="TheileH",at=quantile(tx_sub$TheileH, probs=c(0,.25, .5, .75, 1)), col.regions=cols , main="Three-Race Theile H Index, 2010")

spplot(obj=tx_sub, zcol="spsegr",at=quantile(tx_sub$spsegr, probs=c(0,.25, .5, .75, 1), na.rm=T), col.regions=cols , main="White-Black Spatial Proximity Index, 2010")

spplot(obj=tx_sub, zcol="gnsi",at=quantile(tx_sub$gnsi, probs=c(seq(0,1,length.out = 7)), na.rm=T), col.regions=brewer.pal(n=7, name="Reds") , main="General Neighborhood Sorting Index, 2010")

You could of course, export these or merge them into a individual level data file to include in a hierarchical model. I did this for my coauthors in this and this paper.

DEM 7263 Segregation Measures

Corey S. Sparks, Ph.D.

October 6, 2015

Some background on segregation

Doing this in R