library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.0.5
ddi <- read_ipums_ddi("C:/Users/canda/Documents/DEM7093 gis_class/usa_00019.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
data<-haven::zap_labels(data) #necessary to avoid problems with "labeled" data
library(survey)
library(dplyr)
library(car)
library(ggplot2)
library(tigris)
library(classInt)
library(tmap)
options(tigris_class = "sf")
pumas<-pumas(state = "CA", year = 2019, cb = T)
plot(pumas["GEOID10"], main = "Public Use Microdata Areas in California")
names(data)<-tolower(names(data))
Here I recode several demographic variables
#weight variables, these have implied decimal places so we have to divde by 100, following the codebook
data$pwt <- data$perwt/100
data$hwt <- data$hhwt/100
#race/ethnicity
data$hisp <- Recode(data$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
data$race_rec <- Recode(data$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
data$race_eth <- interaction(data$hisp, data$race_rec, sep = "_")
data$race_eth <- as.factor(ifelse(substr(as.character(data$race_eth),1,8) == "Hispanic", "Hispanic", as.character(data$race_eth)))
data$race_eth <- relevel(data$race_eth, ref = "NonHispanic_White")
#sex
data$male <- ifelse(data$sex == 1,1,0)
#migration
data$migrate<- Recode(data$migrate1, recodes = "1=1; 2:9=0; else=NA")
#employed
data$employed <- Recode(data$empstat, recodes = "1=1; 2:3=0; else=NA")
#maritalstatus
data$marital<-Recode(data$marst, recodes = "1:2=1; 3:6=2; else=NA")
#age in 10 year intervals
data$agecat<-cut(data$age, breaks = c(0, 18, 20, 30, 40, 50, 65, 120), include.lowest = T)
Here we identify the person weights and the survey design variables.
des<-svydesign(ids = ~cluster,
strata = ~ strata,
weights = ~pwt,
data = data)
The svyby() function allows us calculate estimates for different sub-domains within the data, this could be a demographic characteristic, but we’ll use our geographic level.
puma_est_samehouse<-svyby(formula = ~migrate,
by = ~puma,
design = subset(des, age%in%18:120),
FUN=svymean,
na.rm = TRUE )
puma_est_marital<-svyby(formula = ~marital,
by = ~puma,
design=subset(des, migrate==1),
FUN=svymean,
na.rm = TRUE )
puma_est_age<-svyby(formula = ~agecat,
by = ~puma,
design = subset(des, migrate==1),
FUN = svymean,
na.rm = TRUE )
head(puma_est_samehouse)
## puma migrate se
## 101 101 0.7251632 0.009861916
## 102 102 0.8143756 0.006928236
## 103 103 0.8691502 0.006591523
## 104 104 0.8920334 0.007478713
## 105 105 0.8860817 0.006765669
## 106 106 0.9215227 0.005682131
head(puma_est_marital)
## puma marital se
## 101 101 1.632448 0.008319243
## 102 102 1.703854 0.007515933
## 103 103 1.552394 0.008134880
## 104 104 1.696991 0.008505672
## 105 105 1.587970 0.007587832
## 106 106 1.593637 0.008371951
head(puma_est_age)
## puma agecat[0,18] agecat(18,20] agecat(20,30] agecat(30,40] agecat(40,50]
## 101 101 0.1725258 0.03542281 0.17715238 0.1352446 0.1302636
## 102 102 0.1600313 0.01496574 0.17676472 0.2062596 0.1384031
## 103 103 0.1852725 0.01146686 0.08474033 0.1411992 0.1636129
## 104 104 0.2740301 0.02835397 0.15337791 0.1587323 0.1316024
## 105 105 0.2014527 0.01793048 0.11963631 0.1436130 0.1445112
## 106 106 0.2214664 0.01626269 0.12485230 0.1532950 0.1345402
## agecat(50,65] agecat(65,120] se.agecat[0,18] se.agecat(18,20]
## 101 0.1811851 0.16820568 0.006222725 0.003886460
## 102 0.1723630 0.13121254 0.006281255 0.001646195
## 103 0.2278550 0.18585321 0.006311993 0.001604888
## 104 0.1573250 0.09657837 0.008284810 0.002779268
## 105 0.2234704 0.14938594 0.006199349 0.001965251
## 106 0.2136722 0.13591123 0.007097539 0.001966783
## se.agecat(20,30] se.agecat(30,40] se.agecat(40,50] se.agecat(50,65]
## 101 0.008231637 0.005983659 0.005214156 0.006571189
## 102 0.006498034 0.006305241 0.004855648 0.005809187
## 103 0.005095964 0.005915746 0.005989422 0.007131297
## 104 0.007102748 0.006279054 0.005745837 0.006443300
## 105 0.005782855 0.005625599 0.005154400 0.006608431
## 106 0.006123443 0.006287610 0.005611794 0.007049252
## se.agecat(65,120]
## 101 0.006296592
## 102 0.005185209
## 103 0.006736860
## 104 0.005005448
## 105 0.005610579
## 106 0.005681810
pumas$puma<-as.numeric(pumas$PUMACE10)
geo1<-geo_join(pumas, puma_est_samehouse, by_sp="puma",by_df= "puma")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(geo1)
## Simple feature collection with 6 features and 12 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
## STATEFP10 PUMACE10 AFFGEOID10 GEOID10
## 1 06 09702 7950000US0609702 0609702
## 2 06 02300 7950000US0602300 0602300
## 3 06 08506 7950000US0608506 0608506
## 4 06 06506 7950000US0606506 0606506
## 5 06 08900 7950000US0608900 0608900
## 6 06 06103 7950000US0606103 0606103
## NAME10
## 1 Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2 Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4 Riverside County (Southwest)--Hemet City & East Hemet
## 5 Shasta County--Redding City
## 6 Placer County (East/High Country Region)--Auburn & Colfax Cities
## LSAD10 ALAND10 AWATER10 puma migrate se rank
## 1 P0 344472696 7555813 9702 0.8469285 0.009115654 1
## 2 P0 9241426488 1253864712 2300 0.7974411 0.009421146 1
## 3 P0 2152449674 13432167 8506 0.8787603 0.008735708 1
## 4 P0 645481741 4500965 6506 0.8371582 0.009382898 1
## 5 P0 9778407493 186302040 8900 0.8660663 0.006837850 1
## 6 P0 3094034997 246117939 6103 0.8821970 0.008108159 1
## geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...
geo2<-geo_join(pumas, puma_est_marital, by_sp="puma",by_df= "puma")
head(geo2)
## Simple feature collection with 6 features and 12 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
## STATEFP10 PUMACE10 AFFGEOID10 GEOID10
## 1 06 09702 7950000US0609702 0609702
## 2 06 02300 7950000US0602300 0602300
## 3 06 08506 7950000US0608506 0608506
## 4 06 06506 7950000US0606506 0606506
## 5 06 08900 7950000US0608900 0608900
## 6 06 06103 7950000US0606103 0606103
## NAME10
## 1 Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2 Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4 Riverside County (Southwest)--Hemet City & East Hemet
## 5 Shasta County--Redding City
## 6 Placer County (East/High Country Region)--Auburn & Colfax Cities
## LSAD10 ALAND10 AWATER10 puma marital se rank
## 1 P0 344472696 7555813 9702 1.576687 0.009267779 1
## 2 P0 9241426488 1253864712 2300 1.620597 0.009934291 1
## 3 P0 2152449674 13432167 8506 1.568378 0.008900062 1
## 4 P0 645481741 4500965 6506 1.621994 0.008443271 1
## 5 P0 9778407493 186302040 8900 1.556218 0.007571657 1
## 6 P0 3094034997 246117939 6103 1.482881 0.009546415 1
## geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...
geo3<-geo_join(pumas, puma_est_age, by_sp="puma", by_df= "puma")
head(geo3)
## Simple feature collection with 6 features and 24 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
## STATEFP10 PUMACE10 AFFGEOID10 GEOID10
## 1 06 09702 7950000US0609702 0609702
## 2 06 02300 7950000US0602300 0602300
## 3 06 08506 7950000US0608506 0608506
## 4 06 06506 7950000US0606506 0606506
## 5 06 08900 7950000US0608900 0608900
## 6 06 06103 7950000US0606103 0606103
## NAME10
## 1 Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2 Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4 Riverside County (Southwest)--Hemet City & East Hemet
## 5 Shasta County--Redding City
## 6 Placer County (East/High Country Region)--Auburn & Colfax Cities
## LSAD10 ALAND10 AWATER10 puma agecat[0,18] agecat(18,20] agecat(20,30]
## 1 P0 344472696 7555813 9702 0.2133562 0.02668606 0.11205122
## 2 P0 9241426488 1253864712 2300 0.1990870 0.02432914 0.11406488
## 3 P0 2152449674 13432167 8506 0.2656354 0.01999496 0.10660869
## 4 P0 645481741 4500965 6506 0.2786463 0.02874110 0.10612814
## 5 P0 9778407493 186302040 8900 0.2098500 0.01657230 0.10726616
## 6 P0 3094034997 246117939 6103 0.1863977 0.01659084 0.07666723
## agecat(30,40] agecat(40,50] agecat(50,65] agecat(65,120] se.agecat[0,18]
## 1 0.12777390 0.1320894 0.2209579 0.1670853 0.008127629
## 2 0.13029358 0.1218869 0.2221820 0.1881565 0.009372130
## 3 0.12728477 0.1449845 0.2017410 0.1337507 0.008254944
## 4 0.12635495 0.1139341 0.1805173 0.1656781 0.008649513
## 5 0.11569041 0.1083184 0.2331367 0.2091661 0.006736311
## 6 0.09068209 0.1246731 0.2745334 0.2304557 0.009687839
## se.agecat(18,20] se.agecat(20,30] se.agecat(30,40] se.agecat(40,50]
## 1 0.003320595 0.006293752 0.006383677 0.006183066
## 2 0.002994499 0.008404586 0.006774885 0.006578975
## 3 0.002343505 0.006468280 0.006296938 0.006435476
## 4 0.003032720 0.005798519 0.006030991 0.005585108
## 5 0.001869133 0.005258244 0.005161068 0.004928292
## 6 0.002282940 0.006386898 0.005670959 0.006032153
## se.agecat(50,65] se.agecat(65,120] rank geometry
## 1 0.008017311 0.007063306 1 MULTIPOLYGON (((-122.7418 3...
## 2 0.008988799 0.008129300 1 MULTIPOLYGON (((-124.4086 4...
## 3 0.007825928 0.006837918 1 MULTIPOLYGON (((-121.8558 3...
## 4 0.007091173 0.007018028 1 MULTIPOLYGON (((-117.1456 3...
## 5 0.006763043 0.006433814 1 MULTIPOLYGON (((-123.0688 4...
## 6 0.009215991 0.009128643 1 MULTIPOLYGON (((-121.4104 3...
tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo1)+
tm_polygons("migrate",
style="kmeans",
n=8,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Percent of population who live in the same house as last year, 2019")
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo2)+
tm_polygons("marital",
style="kmeans",
n=8,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Percent of population who live in the same house as last year who are married, 2019")
Here we use core based statistical areas instead of PUMAs
mets<-core_based_statistical_areas(cb = T, year = 2019)
mets<-mets[grep(mets$NAME,pattern = "CA"),]
plot(mets["NAME"])
sts<-states(cb=T, year=2019)
sts<-sts%>%
filter(GEOID==06)
met_est_samehouse<-svyby(formula = ~migrate,
by = ~met2013,
design=subset(des, age%in%18:120),
FUN=svymean,
na.rm=T )
met_est_marital<-svyby(formula = ~marital,
by = ~met2013,
design=subset(des, migrate==1),
FUN=svymean,
na.rm=T )
met_est_age<-svyby(formula = ~agecat,
by = ~met2013,
design=subset(des, migrate==1),
FUN=svymean,
na.rm=T )
head(met_est_samehouse)
## met2013 migrate se
## 0 0 0.8504018 0.003198709
## 12540 12540 0.8597531 0.003899065
## 17020 17020 0.8090850 0.007060367
## 20940 20940 0.8978229 0.007534780
## 23420 23420 0.8602213 0.003343725
## 25260 25260 0.8107385 0.009147284
head(met_est_marital)
## met2013 marital se
## 0 0 1.566685 0.003969696
## 12540 12540 1.630053 0.003954311
## 17020 17020 1.612303 0.007298586
## 20940 20940 1.671704 0.008742750
## 23420 23420 1.649251 0.003359611
## 25260 25260 1.625776 0.008821617
head(met_est_age)
## met2013 agecat[0,18] agecat(18,20] agecat(20,30] agecat(30,40]
## 0 0 0.1983414 0.01761420 0.09289516 0.1129798
## 12540 12540 0.2961338 0.02712393 0.14361247 0.1347848
## 17020 17020 0.2097374 0.03050036 0.14508174 0.1114879
## 20940 20940 0.2824310 0.02981515 0.15102420 0.1289902
## 23420 23420 0.2874886 0.02659951 0.14119844 0.1361487
## 25260 25260 0.2861692 0.02119103 0.14808279 0.1554283
## agecat(40,50] agecat(50,65] agecat(65,120] se.agecat[0,18]
## 0 0.1149168 0.2375264 0.2257263 0.003490344
## 12540 0.1212640 0.1684736 0.1086074 0.003951582
## 17020 0.1091705 0.2046513 0.1893708 0.006525106
## 20940 0.1202095 0.1603110 0.1272189 0.009348781
## 23420 0.1179922 0.1683697 0.1222029 0.003398160
## 25260 0.1234529 0.1599422 0.1057336 0.008396426
## se.agecat(18,20] se.agecat(20,30] se.agecat(30,40] se.agecat(40,50]
## 0 0.001049319 0.002645495 0.002474986 0.002472711
## 12540 0.001331334 0.003100665 0.002722308 0.002576628
## 17020 0.003324332 0.006707896 0.004759546 0.004487834
## 20940 0.002920932 0.006859031 0.005998213 0.005473055
## 23420 0.001031444 0.002698786 0.002360374 0.002190485
## 25260 0.002487736 0.006829756 0.006602807 0.005757457
## se.agecat(50,65] se.agecat(65,120]
## 0 0.003468241 0.003485648
## 12540 0.003111805 0.002511580
## 17020 0.006366536 0.006016907
## 20940 0.006533775 0.006381010
## 23420 0.002680021 0.002312830
## 25260 0.006637342 0.005741427
mets$met2013<-as.numeric(mets$GEOID)
geo3<-geo_join(mets, met_est_samehouse,by_sp= "met2013",by_df= "met2013")
Note, grey Metros are ones that are not identified in the ACS
tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo3)+
tm_polygons("migrate",
style="kmeans",
n=8,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Percent of population who live in the same house as last year in California Metro Areas 2019")
mets$met2013<-as.numeric(mets$GEOID)
geo3<-geo_join(mets, met_est_marital,by_sp= "met2013",by_df= "met2013")
tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo3)+
tm_polygons("marital",
style="kmeans",
n=8,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Percent of population who live in the same house as last year who are married in California Metro Areas 2019")