Homework 7 - ipums estimates

In this homework, I’ll be creating IPUMS estimates of California PUMAS showing what percent of the population lived in the same house last year.

First, I load the data:

library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(ggplot2)
library(readxl)
library(censusxy)
library(mapview)
library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.0.5
library(survey)
## Warning: package 'survey' was built under R version 4.0.5
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(dplyr)
library(car)
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(ggplot2)
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(classInt)
library(tmap)

file_path<-"C:/Users/hhx360/Google Drive (naslund.utsa@gmail.com)/School Files/School/MS Applied Demography/semesters/Spring 2021/DEM 7093 - GIS/Homework/Homework 7/usa_00002.xml"

ipums<-read_ipums_ddi(file_path)
data<-read_ipums_micro(ipums)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.

Here is a map of California PUMA regions:

options(tigris_class = "sf")
pumas<-pumas(state = "CA", year = 2019, cb = T)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |======================================================================| 100%
plot(pumas["GEOID10"], main = "Public Use Microdata Areas in California")

names(data)<-tolower(names(data))

Recoding

Here, I recode the migration data to a new variable called “SAME_HOUSE”, which will be “1” if the respondent has lived in the same house since last year, or “0” otherwise. I also remove missing cases:

data<-data %>% mutate(SAME_HOUSE=ifelse(migrate1==1,1,
                                        ifelse(migrate1==2,0,
                                               ifelse(migrate1==3,0,
                                                      ifelse(migrate1==4,0,NA_integer_)))))

data_complete<-data %>% filter(complete.cases(SAME_HOUSE))

Survey design

Here, I create a survey design from the IPUMS data:

des<-svydesign(ids=~cluster,
               strata=~strata,
               weights=~perwt,
               data=data_complete)

…and use that survey design to find the mean of SAME_HOUSE:

puma_est_samehouse<-svyby(formula=~SAME_HOUSE,
                          by=~puma,
                          design=des,
                          FUN=svymean,
                          na.rm=TRUE)

#Joining survey and geo data

Now I can join the survey data with the geo data:

pumas$puma<-as.numeric(pumas$PUMACE10)

geo1<-geo_join(pumas,puma_est_samehouse,by_sp="puma",by_df="puma")
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
head(geo1)
## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## Geodetic CRS:  NAD83
##   STATEFP10 PUMACE10       AFFGEOID10 GEOID10
## 1        06    09702 7950000US0609702 0609702
## 2        06    02300 7950000US0602300 0602300
## 3        06    08506 7950000US0608506 0608506
## 4        06    06506 7950000US0606506 0606506
## 5        06    08900 7950000US0608900 0608900
## 6        06    06103 7950000US0606103 0606103
##                                                                     NAME10
## 1            Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2                                                          Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4                    Riverside County (Southwest)--Hemet City & East Hemet
## 5                                              Shasta County--Redding City
## 6         Placer County (East/High Country Region)--Auburn & Colfax Cities
##   LSAD10    ALAND10   AWATER10 puma SAME_HOUSE          se rank
## 1     P0  344472696    7555813 9702  0.8532523 0.009686046    1
## 2     P0 9241426488 1253864712 2300  0.8019943 0.009648508    1
## 3     P0 2152449674   13432167 8506  0.8776152 0.009286238    1
## 4     P0  645481741    4500965 6506  0.8328268 0.010520474    1
## 5     P0 9778407493  186302040 8900  0.8575320 0.007507578    1
## 6     P0 3094034997  246117939 6103  0.8741276 0.009498079    1
##                         geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...

Map of Residents Who Remained in the Same House for the Past Year

Finally, here is a map of California, showing the estimate of the population who remained in the same house during the past year. Thank you!

tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo1)+
  tm_polygons("SAME_HOUSE",
              style="kmeans",
              n=8,
              legend.hist=TRUE)+
  tm_layout(legend.outside=TRUE,
            title="Residents Who Remained in the Same House For The Past Year")

## [1] "#FFFDDB" "#FEF1AF" "#FED97B" "#FEB441" "#F68820" "#DB5D0A" "#AF3E03"
## [8] "#7A2A05"