library(ipumsr)
cat <- read_ipums_ddi("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7093 GIS/GIS Data/usa_00006.xml")
data <- read_ipums_micro(cat)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
data<-haven::zap_labels(data) #necessary to avoid problems with "labeled" data
Additional Packages
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(ggplot2)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(classInt)
library(tmap)
options(tigris_class = "sf")
pumas<-pumas(state = "CA", year = 2019, cb = T)
##
|
| | 0%
|
|= | 2%
|
|===== | 8%
|
|======== | 11%
|
|========== | 14%
|
|============ | 18%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 24%
|
|=================== | 27%
|
|===================== | 30%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 37%
|
|============================ | 41%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 51%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================== | 57%
|
|========================================== | 61%
|
|============================================ | 63%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|===================================================================== | 99%
|
|======================================================================| 100%
plot(pumas["GEOID10"], main = "Public Use Microdata Areas in California")
names(data)<-tolower(names(data)) # Change variable names to lower case
#weight variables, these have implied decimal places so we have to divde by 100, following the codebook
data$pwt <- data$perwt/100
data$hwt <- data$hhwt/100
#race/ethnicity
data$hisp <- Recode(data$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
data$race_rec <- Recode(data$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
data$race_eth <- interaction(data$hisp, data$race_rec, sep = "_")
data$race_eth <- as.factor(ifelse(substr(as.character(data$race_eth),1,8) == "Hispanic", "Hispanic", as.character(data$race_eth)))
data$race_eth <- relevel(data$race_eth, ref = "NonHispanic_White")
#sex
data$male <- ifelse(data$sex == 1,1,0)
#age in 10 year intervals
data$agecat<-cut(data$age, breaks = c(0, 18, 20, 30, 40, 50, 65, 120), include.lowest = T)
#education
data$educ_level<- Recode(data$educd, recodes = "2:61='0LT_HS';62:64='1_HSD/GED';65:80='2_somecoll';90:100='2_somecoll'; 81:83='3_AssocDegree';101='4_bachelordegree'; 110:116='4_BAplus_GradDegree'; else=NA")
# Migration
data$migr8 <- Recode(data$migrate1, recodes = "1=0; 2:4=1; else=NA")
# Poverty
data$pov <- ifelse(data$poverty <= 100, "at or below",
ifelse(data$poverty >100, "above", NA))
data$pov2 <- Recode(data$pov, recodes = "'at or below'=1; 'above'=0; else=NA")
dog<-svydesign(ids = ~cluster,
strata = ~ strata,
weights = ~pwt,
data = data)
puma_est_edu<-svyby(formula = ~educ_level,
by = ~puma,
design = subset(dog, age>25),
FUN=svymean,
na.rm = TRUE )
puma_est_migr8<-svyby(formula = ~migr8,
by = ~puma,
design = dog,
FUN=svymean,
na.rm = TRUE )
head(puma_est_edu)
## puma educ_level0LT_HS educ_level1_HSD/GED educ_level2_somecoll
## 101 101 0.03771076 0.06569605 0.1166131
## 102 102 0.16054167 0.15357159 0.1579987
## 103 103 0.05152757 0.08422222 0.1280499
## 104 104 0.31012651 0.24563983 0.2083160
## 105 105 0.13347136 0.18646181 0.1905626
## 106 106 0.14333595 0.23360220 0.2146757
## educ_level3_AssocDegree educ_level4_bachelordegree
## 101 0.04322687 0.3299605
## 102 0.06094609 0.2850304
## 103 0.05430587 0.3451345
## 104 0.05510831 0.1160381
## 105 0.07115222 0.2688053
## 106 0.07644186 0.2248899
## educ_level4_BAplus_GradDegree se.educ_level0LT_HS se.educ_level1_HSD/GED
## 101 0.40679279 0.003905639 0.004448080
## 102 0.18191149 0.006521332 0.006326229
## 103 0.33675991 0.004472209 0.005309984
## 104 0.06477123 0.011587978 0.009193415
## 105 0.14954676 0.006885012 0.007559093
## 106 0.10705441 0.007508320 0.008802248
## se.educ_level2_somecoll se.educ_level3_AssocDegree
## 101 0.005691604 0.003405890
## 102 0.005625829 0.003542165
## 103 0.005791771 0.004021030
## 104 0.008470772 0.004609917
## 105 0.006335683 0.004081108
## 106 0.007564748 0.004487001
## se.educ_level4_bachelordegree se.educ_level4_BAplus_GradDegree
## 101 0.008030658 0.008809076
## 102 0.007069620 0.005803563
## 103 0.008150823 0.008192403
## 104 0.006230842 0.004944989
## 105 0.007454848 0.005514219
## 106 0.008090311 0.005358965
head(puma_est_migr8)
## puma migr8 se
## 101 101 0.25576024 0.009366589
## 102 102 0.17404355 0.006778309
## 103 103 0.12507773 0.006679989
## 104 104 0.10950199 0.008340314
## 105 105 0.11113104 0.006785526
## 106 106 0.07729941 0.006058942
pumas$puma<-as.numeric(pumas$PUMACE10)
geo1<-geo_join(pumas, puma_est_migr8, by_sp="puma",by_df= "puma")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(geo1)
## Simple feature collection with 6 features and 12 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
## STATEFP10 PUMACE10 AFFGEOID10 GEOID10
## 1 06 09702 7950000US0609702 0609702
## 2 06 02300 7950000US0602300 0602300
## 3 06 08506 7950000US0608506 0608506
## 4 06 06506 7950000US0606506 0606506
## 5 06 08900 7950000US0608900 0608900
## 6 06 06103 7950000US0606103 0606103
## NAME10
## 1 Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2 Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4 Riverside County (Southwest)--Hemet City & East Hemet
## 5 Shasta County--Redding City
## 6 Placer County (East/High Country Region)--Auburn & Colfax Cities
## LSAD10 ALAND10 AWATER10 puma migr8 se rank
## 1 P0 344472696 7555813 9702 0.1467477 0.009686046 1
## 2 P0 9241426488 1253864712 2300 0.1980057 0.009648508 1
## 3 P0 2152449674 13432167 8506 0.1223848 0.009286238 1
## 4 P0 645481741 4500965 6506 0.1671732 0.010520474 1
## 5 P0 9778407493 186302040 8900 0.1424680 0.007507578 1
## 6 P0 3094034997 246117939 6103 0.1258724 0.009498078 1
## geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...
tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo1)+
tm_polygons("migr8",
style="kmeans",
palette="PuBu",
n=8,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Migration rate in California PUMAs \n 2015-2019")
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1