Blog Post Instructions

Description of data and GIS process. Describe your data, including the source and origin, as well as a plan for what GIS operations you will conducting during the course of your project.

Identifying Gentrification in San Antonio, Bexar County with GIS

This report examines American Community Survey (ACS) 5-year estimates by the U.S. Census Bureau on race/ethnicity, level of education, and income by census tract in Bexar County between 2005 and 2021 to investigate the following research questions:

Using change mapping techniques, four socioeconomic indicators (i.e. racial/ethnic composition, educational attainment, household poverty status, and median household income level) will be traced across time between 2005-09 and 2015-20 5-year data collection spans. This study will compare 2000-05 estimates to the 2016-2020 estimates of the aforementioned socioeconomic indicators for census tracts in Bexar County, TX.

A secondary comparative analysis of data in the Housing Affordability Data System (HADS) published by the U.S. Department of Housing and Development will compliment the GIS processes accomplished in this study. This report will compare housing cost burden (a household’s monthly housing cost divided by its monthly income) for owner and renter households across 2000 and 2013 to examine housing affordability and cost burden trends for households in Bexar County.

#load libraries
library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)

#install api key
census_api_key(key= "d42eebdfb8a5be15b37eb8cef2a3abc37a71f12b", install=T, overwrite=T)
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY"). 
## To use now, restart R or run `readRenviron("~/.Renviron")`

The following code will be used to extract ACS data from the Census Bureau using a researcher-unique API key:

#download acs data from census api 2005-20

v2009_Profile = load_variables(2009,
                       "acs5/profile",
                       cache=T)

v2010_Profile = load_variables(2010,
                               "acs5/profile",
                               cache=T)

v2011_Profile = load_variables(2011,
                               "acs5/profile",
                               cache=T)

v2012_Profile = load_variables(2012,
                               "acs5/profile",
                               cache=T)

v2013_Profile = load_variables(2013,
                               "acs5/profile",
                               cache=T)

v2014_Profile = load_variables(2014,
                               "acs5/profile",
                               cache=T)

v2015_Profile = load_variables(2015,
                               "acs5/profile",
                               cache=T)

v2016_Profile = load_variables(2016,
                               "acs5/profile",
                               cache=T)

v2017_Profile = load_variables(2017,
                               "acs5/profile",
                               cache=T)

v2018_Profile = load_variables(2018,
                               "acs5/profile",
                               cache=T)

v2019_Profile = load_variables(2019,
                               "acs5/profile",
                               cache=T)

v2020_Profile = load_variables(2020,
                               "acs5/profile",
                               cache=T)
#search for variables using grep()
v2009_Profile[grep(x = v2009_Profile$label,
                   "income",
                   ignore.case = T),
              c("name", "label")]

v2020_Profile[grep(x = v2020_Profile$label,
                   "income",
                   ignore.case = T),
              c("name", "label")]

Below is an example of the change mapping technique applied to household median income for comparing 2005-09 estimates to that of 2016-20. Here, estimates for household median income by tract are derived from the 2005-09 ACS and compared to the estimates from the 2016-20 ACS.

#extracting acs summary file data profile variables from 2005 and 2020.
inc_09 = get_acs(geography = "tract",
                state = "TX",
                county = "Bexar County",
                year = 2009,
                variables = "DP03_0063",
                geometry = T,
                output = "wide") #DP03_0119P for 2016-20 time period
## Getting data from the 2005-2009 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
inc_09 = inc_09 %>%
  mutate(p.inc_09 = DP03_0063E,
         p.inc_09.er = DP03_0063M/1.645,
         p.inc_09.cv = 100*(p.inc_09.er/p.inc_09)) %>%
  filter(complete.cases(p.inc_09), is.finite(p.inc_09.cv)==T)%>%
  select(GEOID, p.inc_09, p.inc_09.er, p.inc_09.cv)

head(inc_09)
tm_shape(inc_09)+
  tm_polygons(c("p.inc_09"),
              title=c("Median Income Level"),
              palette="PuBuGn",
              style="quantile",
              n=7)+
  tm_scale_bar()+
  tm_layout(title="Household Median Income, Bexar County, Texas - Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

inc_20 = get_acs(geography = "tract",
                state = "TX",
                county = "Bexar County",
                year = 2020,
                variables = "DP03_0062",
                geometry = T,
                output = "wide") #DP03_0119P for 2016-20 time period
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
inc_20 = inc_20 %>%
  mutate(p.inc_20 = DP03_0062E,
         p.inc_20.er = DP03_0062M/1.645,
         p.inc_20.cv = 100*(p.inc_20.er/p.inc_20)) %>%
  filter(complete.cases(p.inc_20), is.finite(p.inc_20.cv)==T)%>%
  select(GEOID, p.inc_20, p.inc_20.er, p.inc_20.cv)

head(inc_20)
#merge data
st_geometry(inc_20)=NULL

mdat = left_join(inc_09, inc_20, by=c("GEOID"="GEOID"))

head(mdat)
acstest = function(names, geoid, est1, err1, est2, err2, alpha, yr1, yr2, span)
  {
  se1 = err1/qnorm(.90)
  se2 = err2/qnorm(.90)
  yrs1 = seq(yr1, to=yr1-span)
  yrs2 = seq(yr2, to=yr2-span)

  C = mean(yrs2%in%yrs1)
  diff = (est1-est2)
  test = (est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
  crit = qnorm(1-alpha/2)
  pval = 1-pnorm(abs(test))
  result = NULL
  result[pval > alpha] = "insignificant change"
  result[pval < alpha & test < 0] = "significant increase"
  result[pval < alpha & test > 0] = "significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
mdat$signif = significance(est1=mdat$p.inc_09,
                           est2=mdat$p.inc_20,
                           moe1=mdat$p.inc_09.er,
                           moe2 = mdat$p.inc_20.er,
                           clevel = .9)

diff09_20 = acstest(names = mdat$GEOID,
                    geoid = mdat$GEOID,
                    est1 = mdat$p.inc_09,
                    est2 = mdat$p.inc_20,
                    err1 = mdat$p.inc_09.er,
                    err2=mdat$p.inc_20.er,
                    alpha = .1,
                    yr1 = 2009, yr2 = 2020,
                    span = 5)

table(diff09_20$result)
acs_merge = left_join(mdat, diff09_20, by=c("GEOID"="geoid"))

tmap_mode("plot")
## tmap mode set to plotting
map_09 = tm_shape(acs_merge)+
  tm_polygons(c("p.inc_09"),
              title=c("Median Income Level"),
              palette="PuBuGn",
              style="quantile",
              n=7)+
  tm_scale_bar()+
  tm_layout(title="Household Median Income, Bexar County, TX 2009",
            title.size =1.5,
            
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))+
  tm_credits("Source: 2005-09 ACS 5-Year Estimates")

map_20 = tm_shape(acs_merge)+
  tm_polygons(c("p.inc_20"),
              title=c("Median Income Level"),
              palette="PuBuGn",
              style="quantile",
              n=7)+
  tm_scale_bar()+
  tm_layout(title="Household Median Income, Bexar County, Texas 2020",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))+
   tm_credits("Source: 2016-20 ACS 5-Year Estimates")

map_merge = tm_shape(acs_merge)+
  tm_polygons(c("result"),
              title=c("Change"),
              palette="Spectral")+
  tm_layout(title="Change in Household Median Income, Bexar County, Texas 2009 vs. 2020",
            title.size = 1.5,
            legend.frame = T,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()

tmap_arrange(map_09, map_20, map_merge, nrow = 2)