Perform an extract of the 2019 5 Year ACS data.
Select the PUMA variable from the Houshold Geographic variables.
Select the MIGRATE1 variable from the Peson>Migration variables
Select cases that only include the state of California
Using these data, create estimates for Californian PUMAs of the % of the population that lived in the same house last year
Produce a map of these estimates.
Publish the map to Rpubs and submit the link to the site.
library(ipumsr)
ddi <- read_ipums_ddi("~/Desktop/UTSA/4_Spring21/GIS/gis_class/Homework7/usa_00015.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
data<-haven::zap_labels(data) #necessary to avoid problems with "labeled" data
library(survey)
library(dplyr)
library(car)
library(ggplot2)
library(tigris)
library(classInt)
library(tmap)
library(mapview)
options(tigris_class = "sf")
pumas<-pumas(state = "CA", year = 2019, cb = T)
##
|
| | 0%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|=========== | 16%
|
|================ | 22%
|
|===================== | 29%
|
|===================== | 31%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================== | 43%
|
|=============================== | 45%
|
|================================= | 47%
|
|================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|======================================= | 55%
|
|======================================== | 57%
|
|========================================== | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 69%
|
|================================================== | 71%
|
|=================================================== | 73%
|
|===================================================== | 76%
|
|====================================================== | 78%
|
|======================================================== | 80%
|
|========================================================== | 82%
|
|=========================================================== | 84%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=================================================================== | 95%
|
|======================================================================| 100%
plot(pumas["GEOID10"], main = "Public Use Microdata Areas in California")
Goal: % of the population that lived in the same house last year
Migrate1 Codes 0 = NA 1 = Same House 2 = Moved within state 3 = Moved between states 4 = Abroad 1 year ago 9 = Unknown
#weight variables, these have implied decimal places so we have to divde by 100, following the codebook
data$PWT <- data$PERWT/100
data$HWT <- data$HHWT/100
#Migration
data$c_migrate <- Recode(data$MIGRATE1, recodes = "9=NA; 0=NA; 1='Same House'; 2:4='Moved'")
Here we identify the person weights and the survey design variables.
des<-svydesign(ids = ~CLUSTER,
strata = ~STRATA,
weights = ~PWT,
data = data)
The svyby() function allows us calculate estimates for different sub-domains within the data, this could be a demographic characteristic, but we’ll use our geographic level.
test<-svytable(~I(c_migrate=="Same House")+PUMA, design=des )
puma_samehouse<-svyby(formula = ~c_migrate,
by = ~PUMA,
design = des,
FUN=svymean,
na.rm = TRUE )
head(puma_samehouse)
## PUMA c_migrateMoved c_migrateSame House se.c_migrateMoved
## 101 101 0.25576024 0.7442398 0.009366589
## 102 102 0.17404355 0.8259565 0.006778309
## 103 103 0.12507773 0.8749223 0.006679989
## 104 104 0.10950199 0.8904980 0.008340314
## 105 105 0.11113104 0.8888690 0.006785526
## 106 106 0.07729941 0.9227006 0.006058942
## se.c_migrateSame House
## 101 0.009366589
## 102 0.006778309
## 103 0.006679989
## 104 0.008340314
## 105 0.006785526
## 106 0.006058942
pumas$puma<-as.numeric(pumas$PUMACE10)
geo1<-geo_join(pumas, puma_samehouse, by_sp="puma",by_df= "PUMA")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(geo1)
## Simple feature collection with 6 features and 14 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
## STATEFP10 PUMACE10 AFFGEOID10 GEOID10
## 1 06 09702 7950000US0609702 0609702
## 2 06 02300 7950000US0602300 0602300
## 3 06 08506 7950000US0608506 0608506
## 4 06 06506 7950000US0606506 0606506
## 5 06 08900 7950000US0608900 0608900
## 6 06 06103 7950000US0606103 0606103
## NAME10
## 1 Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2 Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4 Riverside County (Southwest)--Hemet City & East Hemet
## 5 Shasta County--Redding City
## 6 Placer County (East/High Country Region)--Auburn & Colfax Cities
## LSAD10 ALAND10 AWATER10 puma c_migrateMoved c_migrateSame House
## 1 P0 344472696 7555813 9702 0.1467477 0.8532523
## 2 P0 9241426488 1253864712 2300 0.1980057 0.8019943
## 3 P0 2152449674 13432167 8506 0.1223848 0.8776152
## 4 P0 645481741 4500965 6506 0.1671732 0.8328268
## 5 P0 9778407493 186302040 8900 0.1424680 0.8575320
## 6 P0 3094034997 246117939 6103 0.1258724 0.8741276
## se.c_migrateMoved se.c_migrateSame House rank geometry
## 1 0.009686046 0.009686046 1 MULTIPOLYGON (((-122.7418 3...
## 2 0.009648508 0.009648508 1 MULTIPOLYGON (((-124.4086 4...
## 3 0.009286238 0.009286238 1 MULTIPOLYGON (((-121.8558 3...
## 4 0.010520474 0.010520474 1 MULTIPOLYGON (((-117.1456 3...
## 5 0.007507578 0.007507578 1 MULTIPOLYGON (((-123.0688 4...
## 6 0.009498078 0.009498078 1 MULTIPOLYGON (((-121.4104 3...
tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo1)+
tm_polygons("c_migrateSame House",
style="kmeans",
n=8,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Percent of Residents living in Same House 1 Year Ago for CA PUMAs \n 2015-2019")
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1