install.packages(“censusapi”) install.packages(“tidycensus”) install.packages(“sf”)

library(censusapi)
## 
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
## 
##     getFunction
library(tidycensus)
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
census_api_key(key ="f71931186e975910aa774fdd6aff604b31e9fe71")
## To install your API key for use in future sessions, run this function with `install = TRUE`.

Look at available ACS variables

v15_Profile <- load_variables(2015 , "acs5/profile", cache = TRUE) #demographic profile tables
v15_tables <- load_variables(2015 , "acs5", cache = TRUE) #all tables
#v10_sf1_tables <- load_variables(2010 , "sf1", cache = TRUE) #all tables for 2010 SF1
#v10_sf1_tables <- load_variables(2000 , "sf3", cache = TRUE) #all tables for 2000 SF#

View(v15_Profile)

#Search for variables by 
v15_Profile[grep(x = v15_Profile$label, "Hispanic"), c("name", "label")]
## # A tibble: 34 x 2
##    name          label                                                    
##    <chr>         <chr>                                                    
##  1 DP02_0149E    Estimate!!ANCESTRY!!Total population!!West Indian (exclu~
##  2 DP02_0149PE   Percent!!ANCESTRY!!Total population!!West Indian (exclud~
##  3 DP02PR_0149E  Estimate!!ANCESTRY!!Total population!!West Indian (exclu~
##  4 DP02PR_0149PE Percent!!ANCESTRY!!Total population!!West Indian (exclud~
##  5 DP05_0066E    Estimate!!HISPANIC OR LATINO AND RACE!!Total population!~
##  6 DP05_0066PE   Percent!!HISPANIC OR LATINO AND RACE!!Total population!!~
##  7 DP05_0067E    Estimate!!HISPANIC OR LATINO AND RACE!!Total population!~
##  8 DP05_0067PE   Percent!!HISPANIC OR LATINO AND RACE!!Total population!!~
##  9 DP05_0068E    Estimate!!HISPANIC OR LATINO AND RACE!!Total population!~
## 10 DP05_0068PE   Percent!!HISPANIC OR LATINO AND RACE!!Total population!!~
## # ... with 24 more rows
v15_Profile[grep(x = v15_Profile$label, "Built 2000 to 2009"), c("name", "label")]
## # A tibble: 2 x 2
##   name        label                                                       
##   <chr>       <chr>                                                       
## 1 DP04_0019E  Estimate!!YEAR STRUCTURE BUILT!!Total housing units!!Built ~
## 2 DP04_0019PE Percent!!YEAR STRUCTURE BUILT!!Total housing units!!Built 2~

Extract from ACS summary file data profile variables from 2015 for Harris County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ha_acs<-get_acs(geography = "tract", state="TX", county = c("Harris County"), year = 2015,
                variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
                            "DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
                            "DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
                            "DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
                            "DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
                            "DP05_0066PE", "DP05_0072PE", "DP02_0113PE") ,
                summary_var = "B01001_001",
                geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
ha_acs$county<-substr(ha_acs$GEOID, 1, 5)

#rename variables and filter missing cases
library(dbplyr)
## 
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
ha_acs2<-ha_acs%>%
  mutate(phisp=DP05_0066PE) %>%
filter(complete.cases(phisp))

class(ha_acs2)

Extract from ACS summary file data profile variables from 2015 for Dallas County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

library(dplyr)
dal_acs<-get_acs(geography = "tract", state="TX", county = c("Dallas County"), year = 2015,
                variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
                            "DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
                            "DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
                            "DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
                            "DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
                            "DP05_0066PE", "DP05_0072PE", "DP02_0113PE") ,
                summary_var = "B01001_001",
                geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
dal_acs$county<-substr(dal_acs$GEOID, 1, 5)

#rename variables and filter missing cases
library(dbplyr)
dal_acs2<-dal_acs%>%
  mutate(phisp=DP05_0066PE) %>%
filter(complete.cases(phisp))

class(dal_acs2)

Harris County Housing

library(dplyr)
ha_housing<-get_acs(geography = "tract", state="TX", county = c("Harris County"), year = 2015,
                variables=c( "DP04_0019PE") ,
                summary_var = "B01001_001",
                geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
ha_housing$county<-substr(ha_housing$GEOID, 1, 5)

#rename variables and filter missing cases
library(dbplyr)
ha_housing2<-ha_housing%>%
  mutate(prophousing= DP04_0019PE) %>%
filter(complete.cases(prophousing))

class(ha_housing2)
## [1] "sf"         "data.frame"

Dallas County Housing

library(dplyr)
dal_housing<-get_acs(geography = "tract", state="TX", county = c("Dallas County"), year = 2015,
                variables=c( "DP04_0019PE") ,
                summary_var = "B01001_001",
                geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
dal_housing$county<-substr(dal_housing$GEOID, 1, 5)

#rename variables and filter missing cases
library(dbplyr)
dal_housing2<-dal_housing%>%
  mutate(prophousing= DP04_0019PE) %>%
filter(complete.cases(prophousing))

class(dal_housing2)
## [1] "sf"         "data.frame"

Write data out to shapefile for Hispanic Population

#change the directory
sf::st_write(ha_acs2,dsn="C:/Users/Monica/Documents/GIS Class/sa_tract_dp.shp",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)
#change the directory
sf::st_write(dal_acs2,dsn="C:/Users/Monica/Documents/GIS Class/sa_tract_dp.shp",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)

Some basic mapping of variables

Here I generate a quantile break for % Hispanic in census tracts and compare it to a Jenks break

install.packages(“classInt”) install.packages(“devtools”) devtools::install_github(“thomasp85/patchwork”) install.packages(“ggsn”)

library(classInt)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`
library(patchwork)


#install.packages("dplyr")
library(dplyr)

phisp_map<-ha_acs2 %>%
  mutate(chisp=cut(phisp,breaks = quantile(phisp, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
           jhisp = cut(phisp,breaks=data.frame(classIntervals(var=ha_acs2$phisp, n=5, style="jenks")[2])[,1], include.lowest = T))
library(ggsn)
## Loading required package: ggplot2
p1<-ggplot(phisp_map, aes(fill = jhisp, color = jhisp)) + 
  geom_sf() + 
  ggtitle("Proportion Hispanic", 
          subtitle = "Harris County Texas, 2015 - Jenks Breaks")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())

p1

phisp_map<-dal_acs2 %>%
  mutate(chisp=cut(phisp,breaks = quantile(phisp, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
           jhisp = cut(phisp,breaks=data.frame(classIntervals(var=dal_acs2$phisp, n=5, style="jenks")[2])[,1], include.lowest = T))
library(ggsn)


p2<-ggplot(phisp_map, aes(fill = jhisp, color = jhisp)) + 
  geom_sf() + 
  ggtitle("Proportion Hispanic", 
          subtitle = "Dallas County Texas, 2015 - Jenks Breaks")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())
p2  

Write data out to shapefile for Housing

#change the directory
sf::st_write(ha_housing2,dsn="C:/Users/Monica/Documents/GIS Class/sa_tract_dp.shp",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)
#change the directory
sf::st_write(dal_housing2,dsn="C:/Users/Monica/Documents/GIS Class/sa_tract_dp.shp",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)

Some basic mapping of variables

Here I generate a quantile break for % Hispanic in census tracts and compare it to a Jenks break

install.packages(“classInt”) install.packages(“devtools”) devtools::install_github(“thomasp85/patchwork”) install.packages(“ggsn”)

library(classInt)
library(patchwork)

#install.packages("dplyr")
library(dplyr)

housing_map<-ha_housing2 %>%
  mutate(chousing=cut(prophousing,breaks = quantile(prophousing, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
           jhousing = cut(prophousing,breaks=data.frame(classIntervals(var=ha_housing2$prophousing, n=5, style="jenks")[2])[,1], include.lowest = T))
library(ggsn)


p3<-ggplot(housing_map, aes(fill = jhousing, color = jhousing)) + 
  geom_sf() + 
  ggtitle("Proportion Housing Built 2000-2009", 
          subtitle = "Harris County Texas, 2015 - Jenks Breaks")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())
p3

housing_map<-dal_housing2 %>%
  mutate(chousing=cut(prophousing,breaks = quantile(prophousing, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
           jhousing = cut(prophousing,breaks=data.frame(classIntervals(var=dal_housing2$prophousing, n=5, style="jenks")[2])[,1], include.lowest = T))
library(ggsn)


p4<-ggplot(housing_map, aes(fill = jhousing, color = jhousing)) + 
  geom_sf() + 
  ggtitle("Proportion Housing Built 2000-2009", 
          subtitle = "Dallas County Texas, 2015 - Jenks Breaks")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())
p4

Summarize Hispanic population in both counties

summary(ha_acs2$phisp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   20.45   35.65   41.44   60.38   97.40
summary(dal_acs2$phisp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   16.20   33.50   37.65   57.00   96.20

The percent of the population that is Hispanic in Harris County is 41.44%, slightly higher than the percent Hispanic in Dallas County at 37.65%

Summarize proportion of housing built between 2000-2009

summary(ha_housing2$prophousing)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    3.50   11.65   17.65   26.12   90.60
summary(dal_housing2$prophousing)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.15    6.80   12.75   18.50   83.50

The proportion of housing built between 2000-2009 in Harris Couunty was 17.65, which is higher than the proportion of housing built in Dallas County during this time at 12.75.