In this homework, I’ll be creating IPUMS estimates of California PUMAS showing what percent of the population lived in the same house last year.
First, I load the data:
library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(ggplot2)
library(readxl)
library(censusxy)
library(mapview)
library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.0.5
library(survey)
## Warning: package 'survey' was built under R version 4.0.5
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(dplyr)
library(car)
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(ggplot2)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(classInt)
library(tmap)
file_path<-"C:/Users/hhx360/Google Drive (naslund.utsa@gmail.com)/School Files/School/MS Applied Demography/semesters/Spring 2021/DEM 7093 - GIS/Homework/Homework 7/usa_00002.xml"
ipums<-read_ipums_ddi(file_path)
data<-read_ipums_micro(ipums)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
Here is a map of California PUMA regions:
options(tigris_class = "sf")
pumas<-pumas(state = "CA", year = 2019, cb = T)
##
|
| | 0%
|
|== | 2%
|
|== | 4%
|
|==== | 6%
|
|===== | 7%
|
|====== | 9%
|
|======= | 10%
|
|========= | 12%
|
|========= | 13%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 19%
|
|============== | 20%
|
|================ | 22%
|
|================ | 23%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 29%
|
|===================== | 30%
|
|======================= | 32%
|
|======================= | 33%
|
|========================= | 35%
|
|========================== | 37%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================= | 46%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 52%
|
|===================================== | 53%
|
|======================================= | 55%
|
|======================================= | 56%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================== | 65%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|============================================================== | 88%
|
|=============================================================== | 89%
|
|================================================================= | 93%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|======================================================================| 100%
plot(pumas["GEOID10"], main = "Public Use Microdata Areas in California")
names(data)<-tolower(names(data))
Here, I recode the migration data to a new variable called “SAME_HOUSE”, which will be “1” if the respondent has lived in the same house since last year, or “0” otherwise. I also remove missing cases:
data<-data %>% mutate(SAME_HOUSE=ifelse(migrate1==1,1,
ifelse(migrate1==2,0,
ifelse(migrate1==3,0,
ifelse(migrate1==4,0,NA_integer_)))))
data_complete<-data %>% filter(complete.cases(SAME_HOUSE))
Here, I create a survey design from the IPUMS data:
des<-svydesign(ids=~cluster,
strata=~strata,
weights=~perwt,
data=data_complete)
…and use that survey design to find the mean of SAME_HOUSE:
puma_est_samehouse<-svyby(formula=~SAME_HOUSE,
by=~puma,
design=des,
FUN=svymean,
na.rm=TRUE)
#Joining survey and geo data
Now I can join the survey data with the geo data:
pumas$puma<-as.numeric(pumas$PUMACE10)
geo1<-geo_join(pumas,puma_est_samehouse,by_sp="puma",by_df="puma")
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
head(geo1)
## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## Geodetic CRS: NAD83
## STATEFP10 PUMACE10 AFFGEOID10 GEOID10
## 1 06 09702 7950000US0609702 0609702
## 2 06 02300 7950000US0602300 0602300
## 3 06 08506 7950000US0608506 0608506
## 4 06 06506 7950000US0606506 0606506
## 5 06 08900 7950000US0608900 0608900
## 6 06 06103 7950000US0606103 0606103
## NAME10
## 1 Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2 Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4 Riverside County (Southwest)--Hemet City & East Hemet
## 5 Shasta County--Redding City
## 6 Placer County (East/High Country Region)--Auburn & Colfax Cities
## LSAD10 ALAND10 AWATER10 puma SAME_HOUSE se rank
## 1 P0 344472696 7555813 9702 0.8532523 0.009686046 1
## 2 P0 9241426488 1253864712 2300 0.8019943 0.009648508 1
## 3 P0 2152449674 13432167 8506 0.8776152 0.009286238 1
## 4 P0 645481741 4500965 6506 0.8328268 0.010520474 1
## 5 P0 9778407493 186302040 8900 0.8575320 0.007507578 1
## 6 P0 3094034997 246117939 6103 0.8741276 0.009498079 1
## geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...
Finally, here is a map of California, showing the estimate of the population who remained in the same house during the past year. Thank you!
tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo1)+
tm_polygons("SAME_HOUSE",
style="kmeans",
n=8,
legend.hist=TRUE)+
tm_layout(legend.outside=TRUE,
title="Residents Who Remained in the Same House For The Past Year")
## [1] "#FFFDDB" "#FEF1AF" "#FED97B" "#FEB441" "#F68820" "#DB5D0A" "#AF3E03"
## [8] "#7A2A05"