Homework 4

#Find the ACS variable for the Percent Vacant housing units for 2011 and 2019 for Harris County, TX:

library(tidycensus)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(sf)

## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE

library(tmap)

census_api_key(key =  "ef214d63d7f3995f639dbdf005b69348d62698ba",overwrite = TRUE, install = T)

## Your original .Renviron will be backed up and stored in your R HOME directory if needed.

## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY"). 
## To use now, restart R or run `readRenviron("~/.Renviron")`

## [1] "ef214d63d7f3995f639dbdf005b69348d62698ba"

readRenviron("~/.Renviron")

v11_Profile <- load_variables(2011,
                              "acs5/profile",
                              cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019,
                              "acs5/profile",
                              cache = TRUE) #demographic 

#Search for variables by using grep()

v11_Profile[grep(x = v11_Profile$label,
                 "VACANT HOUSING UNITS",
                 ignore.case = TRUE), 
            c("name", "label")]

## # A tibble: 2 x 2
##   name       label                                            
##   <chr>      <chr>                                            
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units

v19_Profile[grep(x = v19_Profile$label,
                 "VACANT HOUSING UNITS",
                 ignore.case = TRUE),
            c("name", "label")]

## # A tibble: 2 x 2
##   name       label                                                              
##   <chr>      <chr>                                                              
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~

Extracting from ACS summary file data profile variables from 2011 and 2019 for Harris County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

vac11<-get_acs(geography = "tract",
                state="TX",
                county = "Harris County",
                year = 2011,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")

## Getting data from the 2007-2011 5-year ACS

## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

## Using the ACS Data Profile

#rename variables and filter missing cases
vac11 <- vac11%>%
  mutate(pvac = DP04_0003PE,
         pvac_er = DP04_0003PM/1.645,
         pvac_cv =100* (pvac_er/pvac)) %>%
  filter(complete.cases(pvac), is.finite(pvac_cv)==T)%>%
  select(GEOID, pvac, pvac_er,pvac_cv)

head(vac11)

vac19<-get_acs(geography = "tract",
                state="TX",
                county = "Harris County",
                year = 2019,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")

## Getting data from the 2015-2019 5-year ACS

## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

## Using the ACS Data Profile

#rename variables and filter missing cases
vac19 <- vac19%>%
  mutate(pvac = DP04_0003PE,
         pvac_er = DP04_0003PM/1.645,
         pvac_cv =100* (pvac_er/pvac)) %>%
  filter(complete.cases(pvac), is.finite(pvac_cv)==T)%>%
  select(GEOID, pvac, pvac_er,pvac_cv)

head(vac19)

##Create a map using quantile breaks of this variable for each year:

tm_shape(vac11)+
  tm_polygons(c("pvac"),
              title=c("% in Vacant housing"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant housing units Estimates for 2011 - Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

Mapping of errors in estimates

Here I generate a quantile break for the coefficient of variation in census tract vacant housing units estimates.

p1<-tm_shape(vac11)+
  tm_polygons(c("pvac"),
              title=c("% in Vacant Housing "),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant housing Rates Estimates in 2011",
            title.size =1.5, 
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))


p2<-tm_shape(vac11)+
  tm_polygons(c("pvac_cv"),
              title=c("CV in Vacant housing "),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", title="Harris County Vacant Housing Rate CV in 2011", legend.outside=T)+
  tm_layout(title="Harris County Vacant Housing Rate CV in 2011",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))



tmap_arrange(p1, p2)

plot(vac11$pvac, vac11$pvac_cv, main = "Error in Estimates vs Estimate Size")

tm_shape(vac19)+
  tm_polygons(c("pvac"),
              title=c("% in Vacant housing"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant housing units Estimates for 2019 - Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

Calculate the differences between the two years, including the test for significance of the differnces:

Compare poverty rates over time Here we take the poverty rate in tracts derived from the 2007-2011 ACS and compare it to the estimate from the 2015-2019 ACS

#merge the two years worth of data
st_geometry(vac19)<-NULL #strip the geometry from the 2019 data

mdat<-left_join(vac11, vac19, by=c("GEOID"="GEOID"))

head(mdat)

## Simple feature collection with 6 features and 7 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS:  NAD83
##         GEOID pvac.x pvac_er.x pvac_cv.x pvac.y pvac_er.y pvac_cv.y
## 1 48201222100   12.5  3.647416  29.17933    9.3  3.221884  34.64392
## 2 48201221400   16.3  3.768997  23.12268   13.4  3.465046  25.85855
## 3 48201221300   11.0  4.133739  37.57944    3.2  2.188450  68.38906
## 4 48201530500   24.0  3.586626  14.94428    8.3  2.492401  30.02893
## 5 48201530400   20.5  4.680851  22.83342   10.6  2.978723  28.10116
## 6 48201530300   22.4  4.984802  22.25358   21.9  5.167173  23.59440
##                         geometry
## 1 POLYGON ((-95.34437 29.8805...
## 2 POLYGON ((-95.3931 29.84572...
## 3 POLYGON ((-95.3618 29.84628...
## 4 POLYGON ((-95.38838 29.8306...
## 5 POLYGON ((-95.38838 29.8306...
## 6 POLYGON ((-95.37563 29.8140...

The test for significance of the differences

acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
  crit<-qnorm(1-alpha/2)
  pval<-1-pnorm(abs(test))
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test < 0]<- "significant increase"
  result[pval < alpha & test > 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}

  diff1119<-acstest(names = mdat$GEOID,
                    geoid = mdat$GEOID,
                    est1 = mdat$pvac.x,
                    est2 = mdat$pvac.y,
                    err1 = mdat$pvac_er.x,
                    err2=mdat$pvac_er.y,
                    alpha = .1,
                    yr1 = 2011, yr2=2019,
                    span = 5)

head(diff1119)

##          name       geoid est1 est2      se1      se2 difference      test
## 1 48201222100 48201222100 12.5  9.3 2.846094 2.514050        3.2 0.8426686
## 2 48201221400 48201221400 16.3 13.4 2.940964 2.703789        2.9 0.7259137
## 3 48201221300 48201221300 11.0  3.2 3.225573 1.707656        7.8 2.1371536
## 4 48201530500 48201530500 24.0  8.3 2.798659 1.944831       15.7 4.6067287
## 5 48201530400 48201530400 20.5 10.6 3.652487 2.324310        9.9 2.2867289
## 6 48201530300 48201530300 22.4 21.9 3.889662 4.031967        0.5 0.0892484
##                 result         pval
## 1 insignificant change 1.997069e-01
## 2 insignificant change 2.339458e-01
## 3 significant decrease 1.629275e-02
## 4 significant decrease 2.045265e-06
## 5 significant decrease 1.110583e-02
## 6 insignificant change 4.644422e-01

table(diff1119$result)

## 
## insignificant change significant decrease significant increase 
##                  393                  257                  101

It can be done in a very similar function significance() from the tidycensus package does a similar thing, but with less output.

significance(est1=mdat$pvac.x,
             est2=mdat$pvac.y,
             moe1=mdat$pvac_er.x,
             moe2 = mdat$pvac_er.y,
             clevel = .9)

##   [1] FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
##  [13]  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [25]  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
##  [37] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [49] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
##  [61]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE    NA FALSE FALSE
##  [73] FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
##  [85]  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE
##  [97] FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
## [109]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121] FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [133] FALSE  TRUE    NA  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
## [145]  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE
## [157] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [169]  TRUE  TRUE FALSE  TRUE    NA  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
## [181]  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
## [193] FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE
## [205] FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE
## [217]  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
## [229]  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [241] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE
## [253]  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [265] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [277]  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE
## [289] FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [301] FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [313] FALSE  TRUE FALSE    NA  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE
## [325] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE
## [337] FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
## [349] FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE
## [361] FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [373]  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE
## [385] FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
## [397] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE
## [409]  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [421]  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE
## [433] FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [445]  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE
## [457]  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
## [469]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE
## [481]  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
## [493]  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [505]  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [517]  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE
## [529] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE
## [541]  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
## [553]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE
## [565]  TRUE FALSE FALSE    NA  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [577]    NA  TRUE    NA  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE
## [589]  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE
## [601] FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE
## [613]  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [625]  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [637]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE    NA
## [649] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [661]  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
## [673] FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE
## [685] FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [697]  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [709] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
## [721]  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE
## [733] FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE
## [745] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
## [757]    NA  TRUE FALSE FALSE

mdat$signif<- significance(est1=mdat$pvac.x,
                           est2=mdat$pvac.y,
                           moe1=mdat$pvac_er.x,
                           moe2 = mdat$pvac_er.y,
                           clevel = .9)

table(mdat$signif)

## 
## FALSE  TRUE 
##   393   358

##Creating a map showing the differences by census tract:

acs_merge<-left_join(mdat, diff1119, by=c("GEOID"="geoid"))

tmap_mode("plot")

## tmap mode set to plotting

p1<-tm_shape(acs_merge)+
  tm_polygons(c("pvac.x"),
              title=c("% in Vacant housing  2011"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant Housing Rate Estimates 2011",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

p2<-tm_shape(acs_merge)+
  tm_polygons(c("pvac.y"),
              title=c("% in Vacant Housing 2019"),
              palette="Blues", 
              style="quantile",
              n=5)+
  #tm_format("World", title="Harris County Vacant Housing Rate CV", legend.outside=T)+
  tm_layout(title="Harris County Vacant Housing Rate Estimate 2019",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))


p3  <- tm_shape(acs_merge)+
  tm_polygons(c("result"),
              title=c("Changes"),
              palette = "Set2")+
  #tm_format("World", title="Harris County Vacant Housing Rate CV", legend.outside=T)+
  tm_layout(title="Harris County Vacant housing Rate Estimate Changes",
            title.size =1.5, 
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()
  

tmap_arrange(p1, p2,  p3, nrow=2)

Making an interactive map:

tmap_mode("view")

## tmap mode set to interactive viewing

#osmtile <- tmaptools::read_osm(vac11, mergeTiles = T)

#tm_shape(osmtile)+
 # tm_rgb()+
tm_shape(acs_merge)+
  tm_polygons("result",
              alpha = .7,
              title=c("Changes"),
              palette = "Set2")+
  #tm_format("World", title="Harris County Vacant Housing Rate CV", legend.outside=T)+
  tm_layout(title="Harris County Vacant Housing Rate Estimate Changes",
            title.size =1.5)+
  tm_scale_bar()

  #tm_compass()+

Homework 4

Mahmuda Sultana

2/16/2022

Extracting from ACS summary file data profile variables from 2011 and 2019 for Harris County, TX Census Tracts

Mapping of errors in estimates

Calculate the differences between the two years, including the test for significance of the differnces:

Making an interactive map: