In this project, I attempt to compute deprivation scores among Black rural Texans. I then want to examine the distance between the most deprived tracts in the state. In order to find out which counties have the highest percent of Black deprivation, I pull data from ACS 2019:
First, I pull Census api data.
Then, I call for ACS 2019 data.
#https://cran.r-project.org/web/packages/censusapi/vignettes/getting-started.html
#https://cran.r-project.org/web/packages/censusapi/censusapi.pdf
#install.packages("censusapi")
library(censusapi)
##
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
##
## getFunction
apis <- listCensusApis()
sahie_vars <- listCensusMetadata(
name = "acs/acs5",
vintage = 2019)
#sahie_vars[grep(x = sahie_vars$label, "diploma"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "employ"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "Hispanic"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "Black"), c("name", "label", "concept")]
#sahie_vars[grep(x = sahie_vars$label, "place"), c("name", "label", "concept")]
I look for my data variables from ACS 5yr 2019.
Composing the index requires the following variables (Retrieved from: https://towardsdatascience.com/a-census-based-deprivation-index-using-r-7aa738da697c)
% with less than HS degree (25 years and over) % in below poverty level % of female-headed households with children under 18 % in management, science, and arts occupation % in crowded households (greater than 1 occupant per room) % with public assistance or food stamps % unemployed (16–64 years old in labor force) % with less than 30K annual household income
I compute this index for Black populations exclusively.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sahie_national <- getCensus(
name = "acs/acs5",
vars = c("NAME","B19101B_001E","B19101B_002E","B19101B_003E", "B19101B_004E", "B19101B_005E", "B19101B_006E","B25014B_001E","B25014B_003E","B17020B_001E",
"B17020B_002E","B17010B_001E","B17010B_017E","B17010B_041E","B22005B_001E", "B22005B_002E","C23002B_001E", "C23002B_008E", "C23002B_021E","C15002B_001E",
"C15002B_003E","C15002B_008E","C24010B_001E","C24010B_003E","C24010B_009E","B03003_002E","B03003_003E","B03002_004E","B03002_014E"),
vintage = 2019,
region = "tract:*",
regionin = "state:48")
tab1 <- sahie_national %>%
mutate(Totalpop=B03003_002E+B03003_003E,
Blackpop=B03002_004E+B03002_014E,
PrBlacpo=Blackpop/Totalpop,
Prles30K=(B19101B_002E+B19101B_003E+B19101B_004E+B19101B_005E+B19101B_006E)/B19101B_001E,
Prcrowdd=B25014B_003E/B25014B_001E,
Prpovert=B17020B_002E/B17020B_001E,
Prfmhh18=(B17010B_017E+B17010B_041E)/B17010B_001E,
Prpubfst=B22005B_002E/B22005B_001E,
Prum1664=(C23002B_008E+C23002B_021E)/C23002B_001E,
Prleshsd=(C15002B_003E+C15002B_008E)/C15002B_001E,
Prmnscar=(C24010B_003E+C24010B_009E)/C24010B_001E)%>%
filter(state == 48)%>%
select(state,county,tract,NAME,Totalpop,Blackpop,PrBlacpo,Prles30K,Prcrowdd,Prpovert,Prfmhh18,Prpubfst,Prum1664,Prleshsd,Prmnscar)
tab1$Prles30Ksc <- scale(tab1$Prles30K, center = TRUE, scale = TRUE)
tab1$Prcrowddsc <- scale(tab1$Prcrowdd, center = TRUE, scale = TRUE)
tab1$Prpovertsc <- scale(tab1$Prpovert, center = TRUE, scale = TRUE)
tab1$Prfmhh18sc <- scale(tab1$Prfmhh18, center = TRUE, scale = TRUE)
tab1$Prpubfstsc <- scale(tab1$Prpubfst, center = TRUE, scale = TRUE)
tab1$Prum1664sc <- scale(tab1$Prum1664, center = TRUE, scale = TRUE)
tab1$Prleshsdsc <- scale(tab1$Prleshsd, center = TRUE, scale = TRUE)
tab1$Prmnscarsc <- scale(tab1$Prmnscar, center = TRUE, scale = TRUE)
values <- tab1 %>% select(Prles30Ksc,Prcrowddsc,Prpovertsc,Prfmhh18sc,Prpubfstsc,Prum1664sc,Prleshsdsc,Prmnscarsc) %>% as.matrix()
values[is.nan(values)] <- 0
ND <- psych::principal(values,nfactors = 1)
NDI <- cbind(tab1, ND$scores)
cntysum <- NDI %>% group_by(county) %>% summarise(sum(Totalpop))
NDII = merge(cntysum, NDI, by.x = "county", by.y = "county", all.x = TRUE)
NDIII <- NDII %>%
#filter(PC1 >= 3) %>%
arrange(desc(PC1))
head(NDIII)
## county sum(Totalpop) state tract NAME
## 1 029 1952843 48 160501 Census Tract 1605.01, Bexar County, Texas
## 2 029 1952843 48 160502 Census Tract 1605.02, Bexar County, Texas
## 3 029 1952843 48 150400 Census Tract 1504, Bexar County, Texas
## 4 215 855176 48 021903 Census Tract 219.03, Hidalgo County, Texas
## 5 029 1952843 48 150800 Census Tract 1508, Bexar County, Texas
## 6 457 21518 48 950500 Census Tract 9505, Tyler County, Texas
## Totalpop Blackpop PrBlacpo Prles30K Prcrowdd Prpovert Prfmhh18 Prpubfst
## 1 4455 500 0.112233446 1 1.0000000 0.864 1.0000000 1
## 2 4572 182 0.039807524 1 1.0000000 1.000 1.0000000 1
## 3 4946 26 0.005256773 1 1.0000000 1.000 1.0000000 1
## 4 5365 34 0.006337372 1 1.0000000 1.000 1.0000000 1
## 5 2763 33 0.011943540 1 0.0000000 1.000 1.0000000 1
## 6 3268 98 0.029987760 1 0.7142857 1.000 0.7142857 1
## Prum1664 Prleshsd Prmnscar Prles30Ksc Prcrowddsc Prpovertsc Prfmhh18sc
## 1 0.9250 0.00000000 0 2.609924 8.4018263 2.673693 3.251456
## 2 0.0000 0.08955224 0 2.609924 8.4018263 3.227745 3.251456
## 3 0.0000 0.00000000 0 2.609924 8.4018263 3.227745 3.251456
## 4 0.0000 0.00000000 0 2.609924 8.4018263 3.227745 3.251456
## 5 0.0000 0.68421053 0 2.609924 -0.3471299 3.227745 3.251456
## 6 0.2875 0.31707317 NaN 2.609924 5.9021245 3.227745 2.096297
## Prpubfstsc Prum1664sc Prleshsdsc Prmnscarsc PC1
## 1 3.528078 9.4775136 -0.6789891 -1.21098 5.360772
## 2 3.528078 -0.4934242 -0.1310432 -1.21098 4.867572
## 3 3.528078 -0.4934242 -0.6789891 -1.21098 4.771217
## 4 3.528078 -0.4934242 -0.6789891 -1.21098 4.771217
## 5 3.528078 -0.4934242 3.5075098 -1.21098 4.588126
## 6 3.528078 2.6056511 1.2610958 NaN 4.524232
NDIII$GEOID <- as.numeric(paste(NDIII$state,NDIII$county,NDIII$tract,sep = ""))
NDIV <- NDIII %>% select(GEOID, PC1) %>% arrange(desc(PC1))
#countyND <- function(state,county)
#NDI <-countyND("TX","Falls")
#ggplot(NDI, aes(PC1)) + geom_histogram() + theme_classic()
The most deprived tracts with Black populations are both in Bexar County.
#library(data.table)
#library(kableExtra)
#
#
#NDIII %>%
#kbl(caption = "Poverty Status in Texas' Majority Black Rural Cities")%>%
##add_header_above(c(" ", "Counts" = 3, "Percents" = 2),bold = T) %>%
#kable_classic_2(full_width = T, html_font = "Times",font_size = 11) %>%
#column_spec(1,width = "1.5in")
Now, I want to map deprivation in Bexar county.
library(tidycensus)
library(tidyverse)
library(sf)
library(ggplot2)
library(mapview)
library(RColorBrewer)
library(tmap)
library(tmap)
library(tmaptools)
library(classInt)
library(patchwork)
v15_Profile <- load_variables(2019 , "acs5/profile", cache = TRUE) #demographic profile tables
#Search for variables by keywords in the label
#v15_Profile[grep(x = v15_Profile$label, "POVERTY"), c("name", "label")]
#v15_Profile[grep(x = v15_Profile$label, "Black"), c("name", "label", "concept")]
#v15_Profile[grep(x = v15_Profile$label, "Black"), c("name", "label")]
#v15_Profile[grep(x = v15_Profile$label, "POVERTY"), c("name", "label", "concept")]
#head(v15_Profile)
Here is the map.
#https://spatialanalysis.github.io/lab_tutorials/4_R_Mapping.html
Bexar_acs <-get_acs(geography = "tract",
state="TX",
county = c("Bexar"),
year = 2019,
variables=c( "DP03_0119PE",
"DP05_0078P","DP05_0078") ,
geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|========== | 14%
|
|========== | 15%
|
|============= | 19%
|
|============== | 20%
|
|====================== | 31%
|
|========================= | 36%
|
|================================= | 48%
|
|=================================== | 51%
|
|==================================== | 51%
|
|======================================= | 56%
|
|======================================== | 57%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|======================================================= | 79%
|
|========================================================= | 81%
|
|======================================================================| 100%
#create a county FIPS code - 5 digit
Bexar_acs$county<-substr(Bexar_acs$GEOID, 1, 5)
Bexar_acs$tract <-substr(Bexar_acs$NAME, 7, 14)
#rename variables and filter missing cases
Bexar_acs2V<-Bexar_acs%>%
mutate(pblack= DP05_0078PE, ppov=DP03_0119PE) %>%
# st_transform(crs = 102740)%>%
na.omit()
Bexar_acs2VII = merge(Bexar_acs2V, NDIV, by.x = "GEOID", by.y = "GEOID", all.x = TRUE)
bexpov <- tm_shape(Bexar_acs2VII) +
tm_fill("PC1", title="Black Deprivation by Tract", palette="BuPu") +
tm_borders(col="black",lwd=.85,lty=1)+#lwd = thickness, lty = line type (1-solid,2-dash,3-dotted)
#tm_text("PC1", size = .85, col = "black", fontface = "bold", remove.overlap = TRUE)+
tm_legend(show=TRUE)+
tm_layout(title = "Black Deprivation in Bexar County by Tract", title.size = 1.8, title.position = c("right","bottom"))+
tm_layout(frame=FALSE)+
tm_layout(legend.outside=TRUE)+
tm_scale_bar()+
tm_compass()
bexpov
#bexpov_bytract <- tm_shape(Bexar_acs2VII) +
#tm_fill("PC1", title="Black Deprivation by Tract", palette="BuPu") +
#tm_borders(col="black",lwd=.85,lty=1)+ #lwd = thickness, lty = line type (1-solid,2-dash,3-dotted)
#tm_text("DP05_0078E", size = .85, col = "black", fontface = "bold", remove.overlap = TRUE)+
#tm_facets(by = "tract")+
#tm_layout(legend.position = c("right", "bottom"))
#library(grid)
#grid.newpage()
#pushViewport(viewport(layout = grid.layout(2, 2, heights = unit(c(0.5, 5, 5), "null"))))
#print(bexpov, vp=viewport(layout.pos.row = 2, layout.pos.col = 1))
##print(bexpov_bytract, vp=viewport(layout.pos.row = 2, layout.pos.col = 2))
#grid.text("Black Deprivation by County (ACS 2019)", vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2))
Now, I will to find the coordinate system.
st_crs(Bexar_acs2VII)
## Coordinate Reference System:
## User input: NAD83
## wkt:
## GEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]]
Now I will re-project this accordingly.
new_sa<-st_transform(Bexar_acs2VII, crs = 2278)
#Extract two tracts
twtr<-new_sa%>%
filter(GEOID %in% c(48029160501, 48029160502))
# get centroid coordinates for two tracts
tr_co<-st_centroid(twtr)
## Warning in st_centroid.sf(twtr): st_centroid assumes attributes are constant
## over geometries of x
#Measure feet apart
st_distance(tr_co)
## Units: [US_survey_foot]
## [,1] [,2]
## [1,] 0.000 3102.851
## [2,] 3102.851 0.000
The distance between both tracts looks to be a little more than 3100 feet apart.
vnew_sa<-st_transform(Bexar_acs2VII, crs = 4269)
#Extract two tracts
vtwtr<-vnew_sa%>%
filter(GEOID %in% c(48029160501, 48029160502))
# get centroid coordinates for two tracts
vtr_co<-st_centroid(vtwtr)
## Warning in st_centroid.sf(vtwtr): st_centroid assumes attributes are constant
## over geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data
#Measure feet apart
st_distance(vtr_co)
## Units: [m]
## [,1] [,2]
## [1,] 0.0000 945.8761
## [2,] 945.8761 0.0000
The distance between the two points with the South Central Texas projection is relatively similar, but still notable different. The South Central Texas projection computed a distance of 3102.851 feet, whereas the NAD83 layer resulted in a difference of 945.8761 meters (coverts to 3103.25 feet). Still, projecting other layers to this shapefile can be risky given that the reference coordinates differ across systems.