In this project, I attempt to compute deprivation scores among Black rural Texans. I then want to examine the distance between the most deprived tracts in the state. In order to find out which counties have the highest percent of Black deprivation, I pull data from ACS 2019:

First, I pull Census api data.

Then, I call for ACS 2019 data.

#https://cran.r-project.org/web/packages/censusapi/vignettes/getting-started.html
#https://cran.r-project.org/web/packages/censusapi/censusapi.pdf
#install.packages("censusapi")
library(censusapi)
## 
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
## 
##     getFunction
apis <- listCensusApis()

sahie_vars <- listCensusMetadata(
    name = "acs/acs5", 
    vintage = 2019)

#sahie_vars[grep(x = sahie_vars$label, "diploma"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "employ"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "Hispanic"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "Black"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "place"), c("name", "label", "concept")]

I look for my data variables from ACS 5yr 2019.

Composing the index requires the following variables (Retrieved from: https://towardsdatascience.com/a-census-based-deprivation-index-using-r-7aa738da697c)

% with less than HS degree (25 years and over) % in below poverty level % of female-headed households with children under 18 % in management, science, and arts occupation % in crowded households (greater than 1 occupant per room) % with public assistance or food stamps % unemployed (16–64 years old in labor force) % with less than 30K annual household income

I compute this index for Black populations exclusively.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sahie_national <- getCensus(
    name = "acs/acs5",
    vars = c("NAME","B19101B_001E","B19101B_002E","B19101B_003E", "B19101B_004E", "B19101B_005E", "B19101B_006E","B25014B_001E","B25014B_003E","B17020B_001E",
"B17020B_002E","B17010B_001E","B17010B_017E","B17010B_041E","B22005B_001E", "B22005B_002E","C23002B_001E", "C23002B_008E", "C23002B_021E","C15002B_001E",
"C15002B_003E","C15002B_008E","C24010B_001E","C24010B_003E","C24010B_009E","B03003_002E","B03003_003E","B03002_004E","B03002_014E"),
    vintage = 2019,
    region = "tract:*", 
    regionin = "state:48")

tab1 <- sahie_national %>% 
  mutate(Totalpop=B03003_002E+B03003_003E,
         Blackpop=B03002_004E+B03002_014E,
         PrBlacpo=Blackpop/Totalpop,
         Prles30K=(B19101B_002E+B19101B_003E+B19101B_004E+B19101B_005E+B19101B_006E)/B19101B_001E,
         Prcrowdd=B25014B_003E/B25014B_001E,
         Prpovert=B17020B_002E/B17020B_001E,
         Prfmhh18=(B17010B_017E+B17010B_041E)/B17010B_001E,
         Prpubfst=B22005B_002E/B22005B_001E,
         Prum1664=(C23002B_008E+C23002B_021E)/C23002B_001E,
         Prleshsd=(C15002B_003E+C15002B_008E)/C15002B_001E,
         Prmnscar=(C24010B_003E+C24010B_009E)/C24010B_001E)%>% 
filter(state == 48)%>%
  select(state,county,tract,NAME,Totalpop,Blackpop,PrBlacpo,Prles30K,Prcrowdd,Prpovert,Prfmhh18,Prpubfst,Prum1664,Prleshsd,Prmnscar)


tab1$Prles30Ksc <- scale(tab1$Prles30K, center = TRUE, scale = TRUE)
tab1$Prcrowddsc <- scale(tab1$Prcrowdd, center = TRUE, scale = TRUE)
tab1$Prpovertsc <- scale(tab1$Prpovert, center = TRUE, scale = TRUE)
tab1$Prfmhh18sc <- scale(tab1$Prfmhh18, center = TRUE, scale = TRUE)
tab1$Prpubfstsc <- scale(tab1$Prpubfst, center = TRUE, scale = TRUE)
tab1$Prum1664sc <- scale(tab1$Prum1664, center = TRUE, scale = TRUE)
tab1$Prleshsdsc <- scale(tab1$Prleshsd, center = TRUE, scale = TRUE)
tab1$Prmnscarsc <- scale(tab1$Prmnscar, center = TRUE, scale = TRUE)


values  <-  tab1 %>% select(Prles30Ksc,Prcrowddsc,Prpovertsc,Prfmhh18sc,Prpubfstsc,Prum1664sc,Prleshsdsc,Prmnscarsc) %>% as.matrix()
values[is.nan(values)] <- 0
  
ND <- psych::principal(values,nfactors = 1)          
NDI <- cbind(tab1, ND$scores) 

cntysum <- NDI %>% group_by(county) %>% summarise(sum(Totalpop))
NDII = merge(cntysum, NDI, by.x = "county", by.y = "county", all.x = TRUE)
NDIII <- NDII %>% 
  #filter(PC1 >= 3) %>% 
  arrange(desc(PC1)) 

head(NDIII)
##   county sum(Totalpop) state  tract                                       NAME
## 1    029       1952843    48 160501  Census Tract 1605.01, Bexar County, Texas
## 2    029       1952843    48 160502  Census Tract 1605.02, Bexar County, Texas
## 3    029       1952843    48 150400     Census Tract 1504, Bexar County, Texas
## 4    215        855176    48 021903 Census Tract 219.03, Hidalgo County, Texas
## 5    029       1952843    48 150800     Census Tract 1508, Bexar County, Texas
## 6    457         21518    48 950500     Census Tract 9505, Tyler County, Texas
##   Totalpop Blackpop    PrBlacpo Prles30K  Prcrowdd Prpovert  Prfmhh18 Prpubfst
## 1     4455      500 0.112233446        1 1.0000000    0.864 1.0000000        1
## 2     4572      182 0.039807524        1 1.0000000    1.000 1.0000000        1
## 3     4946       26 0.005256773        1 1.0000000    1.000 1.0000000        1
## 4     5365       34 0.006337372        1 1.0000000    1.000 1.0000000        1
## 5     2763       33 0.011943540        1 0.0000000    1.000 1.0000000        1
## 6     3268       98 0.029987760        1 0.7142857    1.000 0.7142857        1
##   Prum1664   Prleshsd Prmnscar Prles30Ksc Prcrowddsc Prpovertsc Prfmhh18sc
## 1   0.9250 0.00000000        0   2.609924  8.4018263   2.673693   3.251456
## 2   0.0000 0.08955224        0   2.609924  8.4018263   3.227745   3.251456
## 3   0.0000 0.00000000        0   2.609924  8.4018263   3.227745   3.251456
## 4   0.0000 0.00000000        0   2.609924  8.4018263   3.227745   3.251456
## 5   0.0000 0.68421053        0   2.609924 -0.3471299   3.227745   3.251456
## 6   0.2875 0.31707317      NaN   2.609924  5.9021245   3.227745   2.096297
##   Prpubfstsc Prum1664sc Prleshsdsc Prmnscarsc      PC1
## 1   3.528078  9.4775136 -0.6789891   -1.21098 5.360772
## 2   3.528078 -0.4934242 -0.1310432   -1.21098 4.867572
## 3   3.528078 -0.4934242 -0.6789891   -1.21098 4.771217
## 4   3.528078 -0.4934242 -0.6789891   -1.21098 4.771217
## 5   3.528078 -0.4934242  3.5075098   -1.21098 4.588126
## 6   3.528078  2.6056511  1.2610958        NaN 4.524232
NDIII$GEOID <- as.numeric(paste(NDIII$state,NDIII$county,NDIII$tract,sep = ""))
NDIV <- NDIII %>% select(GEOID, PC1) %>% arrange(desc(PC1)) 


#countyND <- function(state,county)
#NDI <-countyND("TX","Falls")
#ggplot(NDI, aes(PC1)) + geom_histogram() + theme_classic()

The most deprived tracts with Black populations are both in Bexar County.

#library(data.table)
#library(kableExtra)
#
#
#NDIII %>%
#kbl(caption = "Poverty Status in Texas' Majority Black Rural Cities")%>%
##add_header_above(c(" ", "Counts" = 3, "Percents" = 2),bold = T) %>%
#kable_classic_2(full_width = T, html_font = "Times",font_size = 11) %>%
#column_spec(1,width = "1.5in")

Now, I want to map deprivation in Bexar county.

library(tidycensus)
library(tidyverse)
library(sf)
library(ggplot2)
library(mapview)
library(RColorBrewer)
library(tmap)
library(tmap)
library(tmaptools)
library(classInt)
library(patchwork)


v15_Profile <- load_variables(2019 , "acs5/profile", cache = TRUE) #demographic profile tables

#Search for variables by keywords in the label 
#v15_Profile[grep(x = v15_Profile$label, "POVERTY"), c("name", "label")]
#v15_Profile[grep(x = v15_Profile$label, "Black"), c("name", "label", "concept")]
#v15_Profile[grep(x = v15_Profile$label, "Black"), c("name", "label")]

#v15_Profile[grep(x = v15_Profile$label, "POVERTY"), c("name", "label", "concept")]

#head(v15_Profile)

Here is the map.

#https://spatialanalysis.github.io/lab_tutorials/4_R_Mapping.html

Bexar_acs <-get_acs(geography = "tract",
                state="TX",
                county = c("Bexar"),
                year = 2019,
                variables=c( "DP03_0119PE", 
                            "DP05_0078P","DP05_0078") ,
                geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |======================================================================| 100%
#create a county FIPS code - 5 digit
Bexar_acs$county<-substr(Bexar_acs$GEOID, 1, 5)
Bexar_acs$tract <-substr(Bexar_acs$NAME, 7, 14)

#rename variables and filter missing cases
Bexar_acs2V<-Bexar_acs%>%
  mutate(pblack= DP05_0078PE, ppov=DP03_0119PE) %>%
#  st_transform(crs = 102740)%>%
  na.omit()

Bexar_acs2VII = merge(Bexar_acs2V, NDIV, by.x = "GEOID", by.y = "GEOID", all.x = TRUE)


bexpov <- tm_shape(Bexar_acs2VII) +
  tm_fill("PC1", title="Black Deprivation by Tract", palette="BuPu") +
  tm_borders(col="black",lwd=.85,lty=1)+#lwd = thickness, lty = line type (1-solid,2-dash,3-dotted)
  #tm_text("PC1", size = .85, col = "black", fontface = "bold", remove.overlap = TRUE)+
  tm_legend(show=TRUE)+
  tm_layout(title = "Black Deprivation in Bexar County by Tract", title.size = 1.8, title.position = c("right","bottom"))+
  tm_layout(frame=FALSE)+
  tm_layout(legend.outside=TRUE)+
  tm_scale_bar()+
  tm_compass()

bexpov

 #bexpov_bytract <- tm_shape(Bexar_acs2VII) +
 #tm_fill("PC1", title="Black Deprivation by Tract", palette="BuPu") +
 #tm_borders(col="black",lwd=.85,lty=1)+ #lwd = thickness, lty = line type (1-solid,2-dash,3-dotted)
 #tm_text("DP05_0078E", size = .85, col = "black", fontface = "bold", remove.overlap = TRUE)+
 #tm_facets(by = "tract")+  
 #tm_layout(legend.position = c("right", "bottom"))

#library(grid)
#grid.newpage()
#pushViewport(viewport(layout = grid.layout(2, 2, heights = unit(c(0.5, 5, 5), "null"))))   
#print(bexpov, vp=viewport(layout.pos.row = 2, layout.pos.col = 1))
##print(bexpov_bytract, vp=viewport(layout.pos.row = 2, layout.pos.col = 2))
#grid.text("Black Deprivation by County (ACS 2019)", vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2))

Now, I will to find the coordinate system.

find coordinate system of current map

st_crs(Bexar_acs2VII)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]

Now I will re-project this accordingly.

re-project map into South Central Texas projection

new_sa<-st_transform(Bexar_acs2VII, crs = 2278)

#Extract two tracts
twtr<-new_sa%>%
  filter(GEOID %in% c(48029160501, 48029160502))

# get centroid coordinates for two tracts
tr_co<-st_centroid(twtr)
## Warning in st_centroid.sf(twtr): st_centroid assumes attributes are constant
## over geometries of x
#Measure feet apart
st_distance(tr_co)
## Units: [US_survey_foot]
##          [,1]     [,2]
## [1,]    0.000 3102.851
## [2,] 3102.851    0.000

The distance between both tracts looks to be a little more than 3100 feet apart.

vnew_sa<-st_transform(Bexar_acs2VII, crs = 4269)

#Extract two tracts
vtwtr<-vnew_sa%>%
  filter(GEOID %in% c(48029160501, 48029160502))

# get centroid coordinates for two tracts
vtr_co<-st_centroid(vtwtr)
## Warning in st_centroid.sf(vtwtr): st_centroid assumes attributes are constant
## over geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data
#Measure feet apart
st_distance(vtr_co)
## Units: [m]
##          [,1]     [,2]
## [1,]   0.0000 945.8761
## [2,] 945.8761   0.0000

The distance between the two points with the South Central Texas projection is relatively similar, but still notable different. The South Central Texas projection computed a distance of 3102.851 feet, whereas the NAD83 layer resulted in a difference of 945.8761 meters (coverts to 3103.25 feet). Still, projecting other layers to this shapefile can be risky given that the reference coordinates differ across systems.