IPUMS estimates by PUMA in California for 2019

library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.0.5
ddi <- read_ipums_ddi("C:/Users/canda/Documents/DEM7093 gis_class/usa_00019.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
data<-haven::zap_labels(data) #necessary to avoid problems with "labeled" data
library(survey)
library(dplyr)
library(car)
library(ggplot2)
library(tigris)
library(classInt)
library(tmap)

Download geographic data for Public Use Microdata Areas

options(tigris_class = "sf")
pumas<-pumas(state = "CA", year = 2019, cb = T)
plot(pumas["GEOID10"], main = "Public Use Microdata Areas in California")

names(data)<-tolower(names(data))

Prepare variables

Here I recode several demographic variables

#weight variables, these have implied decimal places so we have to divde by 100, following the codebook
data$pwt <- data$perwt/100
data$hwt <- data$hhwt/100

#race/ethnicity
data$hisp <- Recode(data$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
data$race_rec <- Recode(data$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
data$race_eth <- interaction(data$hisp, data$race_rec, sep = "_")
data$race_eth  <- as.factor(ifelse(substr(as.character(data$race_eth),1,8) == "Hispanic", "Hispanic", as.character(data$race_eth)))
data$race_eth <- relevel(data$race_eth, ref = "NonHispanic_White")

#sex
data$male <- ifelse(data$sex == 1,1,0)

#migration
data$migrate<- Recode(data$migrate1, recodes = "1=1; 2:9=0; else=NA")

#employed
data$employed <- Recode(data$empstat, recodes = "1=1; 2:3=0; else=NA")


#maritalstatus
data$marital<-Recode(data$marst, recodes = "1:2=1; 3:6=2; else=NA")


#age in 10 year intervals
data$agecat<-cut(data$age, breaks = c(0, 18, 20, 30, 40, 50, 65, 120), include.lowest = T)

Generate survey design object

Here we identify the person weights and the survey design variables.

des<-svydesign(ids = ~cluster,
               strata = ~ strata,
               weights = ~pwt,
               data = data)

Perform survey estimation for PUMAs

The svyby() function allows us calculate estimates for different sub-domains within the data, this could be a demographic characteristic, but we’ll use our geographic level.

puma_est_samehouse<-svyby(formula = ~migrate,
                    by = ~puma,
                    design = subset(des, age%in%18:120),
                    FUN=svymean,
                    na.rm = TRUE )

puma_est_marital<-svyby(formula = ~marital,
                       by = ~puma,
                       design=subset(des, migrate==1),
                       FUN=svymean,
                       na.rm = TRUE )

puma_est_age<-svyby(formula = ~agecat,
                         by = ~puma,
                         design = subset(des, migrate==1),
                         FUN = svymean,
                         na.rm = TRUE )

head(puma_est_samehouse)
##     puma   migrate          se
## 101  101 0.7251632 0.009861916
## 102  102 0.8143756 0.006928236
## 103  103 0.8691502 0.006591523
## 104  104 0.8920334 0.007478713
## 105  105 0.8860817 0.006765669
## 106  106 0.9215227 0.005682131
head(puma_est_marital)
##     puma  marital          se
## 101  101 1.632448 0.008319243
## 102  102 1.703854 0.007515933
## 103  103 1.552394 0.008134880
## 104  104 1.696991 0.008505672
## 105  105 1.587970 0.007587832
## 106  106 1.593637 0.008371951
head(puma_est_age)
##     puma agecat[0,18] agecat(18,20] agecat(20,30] agecat(30,40] agecat(40,50]
## 101  101    0.1725258    0.03542281    0.17715238     0.1352446     0.1302636
## 102  102    0.1600313    0.01496574    0.17676472     0.2062596     0.1384031
## 103  103    0.1852725    0.01146686    0.08474033     0.1411992     0.1636129
## 104  104    0.2740301    0.02835397    0.15337791     0.1587323     0.1316024
## 105  105    0.2014527    0.01793048    0.11963631     0.1436130     0.1445112
## 106  106    0.2214664    0.01626269    0.12485230     0.1532950     0.1345402
##     agecat(50,65] agecat(65,120] se.agecat[0,18] se.agecat(18,20]
## 101     0.1811851     0.16820568     0.006222725      0.003886460
## 102     0.1723630     0.13121254     0.006281255      0.001646195
## 103     0.2278550     0.18585321     0.006311993      0.001604888
## 104     0.1573250     0.09657837     0.008284810      0.002779268
## 105     0.2234704     0.14938594     0.006199349      0.001965251
## 106     0.2136722     0.13591123     0.007097539      0.001966783
##     se.agecat(20,30] se.agecat(30,40] se.agecat(40,50] se.agecat(50,65]
## 101      0.008231637      0.005983659      0.005214156      0.006571189
## 102      0.006498034      0.006305241      0.004855648      0.005809187
## 103      0.005095964      0.005915746      0.005989422      0.007131297
## 104      0.007102748      0.006279054      0.005745837      0.006443300
## 105      0.005782855      0.005625599      0.005154400      0.006608431
## 106      0.006123443      0.006287610      0.005611794      0.007049252
##     se.agecat(65,120]
## 101       0.006296592
## 102       0.005185209
## 103       0.006736860
## 104       0.005005448
## 105       0.005610579
## 106       0.005681810

Join to geography

pumas$puma<-as.numeric(pumas$PUMACE10)

geo1<-geo_join(pumas, puma_est_samehouse, by_sp="puma",by_df= "puma")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(geo1)
## Simple feature collection with 6 features and 12 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
##   STATEFP10 PUMACE10       AFFGEOID10 GEOID10
## 1        06    09702 7950000US0609702 0609702
## 2        06    02300 7950000US0602300 0602300
## 3        06    08506 7950000US0608506 0608506
## 4        06    06506 7950000US0606506 0606506
## 5        06    08900 7950000US0608900 0608900
## 6        06    06103 7950000US0606103 0606103
##                                                                     NAME10
## 1            Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2                                                          Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4                    Riverside County (Southwest)--Hemet City & East Hemet
## 5                                              Shasta County--Redding City
## 6         Placer County (East/High Country Region)--Auburn & Colfax Cities
##   LSAD10    ALAND10   AWATER10 puma   migrate          se rank
## 1     P0  344472696    7555813 9702 0.8469285 0.009115654    1
## 2     P0 9241426488 1253864712 2300 0.7974411 0.009421146    1
## 3     P0 2152449674   13432167 8506 0.8787603 0.008735708    1
## 4     P0  645481741    4500965 6506 0.8371582 0.009382898    1
## 5     P0 9778407493  186302040 8900 0.8660663 0.006837850    1
## 6     P0 3094034997  246117939 6103 0.8821970 0.008108159    1
##                         geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...
geo2<-geo_join(pumas, puma_est_marital, by_sp="puma",by_df= "puma")
head(geo2)
## Simple feature collection with 6 features and 12 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
##   STATEFP10 PUMACE10       AFFGEOID10 GEOID10
## 1        06    09702 7950000US0609702 0609702
## 2        06    02300 7950000US0602300 0602300
## 3        06    08506 7950000US0608506 0608506
## 4        06    06506 7950000US0606506 0606506
## 5        06    08900 7950000US0608900 0608900
## 6        06    06103 7950000US0606103 0606103
##                                                                     NAME10
## 1            Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2                                                          Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4                    Riverside County (Southwest)--Hemet City & East Hemet
## 5                                              Shasta County--Redding City
## 6         Placer County (East/High Country Region)--Auburn & Colfax Cities
##   LSAD10    ALAND10   AWATER10 puma  marital          se rank
## 1     P0  344472696    7555813 9702 1.576687 0.009267779    1
## 2     P0 9241426488 1253864712 2300 1.620597 0.009934291    1
## 3     P0 2152449674   13432167 8506 1.568378 0.008900062    1
## 4     P0  645481741    4500965 6506 1.621994 0.008443271    1
## 5     P0 9778407493  186302040 8900 1.556218 0.007571657    1
## 6     P0 3094034997  246117939 6103 1.482881 0.009546415    1
##                         geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...
geo3<-geo_join(pumas, puma_est_age, by_sp="puma", by_df= "puma")
head(geo3)
## Simple feature collection with 6 features and 24 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
##   STATEFP10 PUMACE10       AFFGEOID10 GEOID10
## 1        06    09702 7950000US0609702 0609702
## 2        06    02300 7950000US0602300 0602300
## 3        06    08506 7950000US0608506 0608506
## 4        06    06506 7950000US0606506 0606506
## 5        06    08900 7950000US0608900 0608900
## 6        06    06103 7950000US0606103 0606103
##                                                                     NAME10
## 1            Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2                                                          Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4                    Riverside County (Southwest)--Hemet City & East Hemet
## 5                                              Shasta County--Redding City
## 6         Placer County (East/High Country Region)--Auburn & Colfax Cities
##   LSAD10    ALAND10   AWATER10 puma agecat[0,18] agecat(18,20] agecat(20,30]
## 1     P0  344472696    7555813 9702    0.2133562    0.02668606    0.11205122
## 2     P0 9241426488 1253864712 2300    0.1990870    0.02432914    0.11406488
## 3     P0 2152449674   13432167 8506    0.2656354    0.01999496    0.10660869
## 4     P0  645481741    4500965 6506    0.2786463    0.02874110    0.10612814
## 5     P0 9778407493  186302040 8900    0.2098500    0.01657230    0.10726616
## 6     P0 3094034997  246117939 6103    0.1863977    0.01659084    0.07666723
##   agecat(30,40] agecat(40,50] agecat(50,65] agecat(65,120] se.agecat[0,18]
## 1    0.12777390     0.1320894     0.2209579      0.1670853     0.008127629
## 2    0.13029358     0.1218869     0.2221820      0.1881565     0.009372130
## 3    0.12728477     0.1449845     0.2017410      0.1337507     0.008254944
## 4    0.12635495     0.1139341     0.1805173      0.1656781     0.008649513
## 5    0.11569041     0.1083184     0.2331367      0.2091661     0.006736311
## 6    0.09068209     0.1246731     0.2745334      0.2304557     0.009687839
##   se.agecat(18,20] se.agecat(20,30] se.agecat(30,40] se.agecat(40,50]
## 1      0.003320595      0.006293752      0.006383677      0.006183066
## 2      0.002994499      0.008404586      0.006774885      0.006578975
## 3      0.002343505      0.006468280      0.006296938      0.006435476
## 4      0.003032720      0.005798519      0.006030991      0.005585108
## 5      0.001869133      0.005258244      0.005161068      0.004928292
## 6      0.002282940      0.006386898      0.005670959      0.006032153
##   se.agecat(50,65] se.agecat(65,120] rank                       geometry
## 1      0.008017311       0.007063306    1 MULTIPOLYGON (((-122.7418 3...
## 2      0.008988799       0.008129300    1 MULTIPOLYGON (((-124.4086 4...
## 3      0.007825928       0.006837918    1 MULTIPOLYGON (((-121.8558 3...
## 4      0.007091173       0.007018028    1 MULTIPOLYGON (((-117.1456 3...
## 5      0.006763043       0.006433814    1 MULTIPOLYGON (((-123.0688 4...
## 6      0.009215991       0.009128643    1 MULTIPOLYGON (((-121.4104 3...

Map estimates

Same house rates by PUMA

tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo1)+
  tm_polygons("migrate",
              style="kmeans",
              n=8,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Percent of population who live in the same house as last year, 2019") 
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1

Married rates of same house by PUMA

tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo2)+
  tm_polygons("marital",
              style="kmeans",
              n=8,
              legend.hist = TRUE) +
 tm_layout(legend.outside = TRUE,
            title = "Percent of population who live in the same house as last year who are married, 2019")  

Estimation for metro areas

Here we use core based statistical areas instead of PUMAs

mets<-core_based_statistical_areas(cb = T, year = 2019)
mets<-mets[grep(mets$NAME,pattern =  "CA"),]
plot(mets["NAME"])

sts<-states(cb=T, year=2019)
sts<-sts%>%
  filter(GEOID==06)

estimates by metro area

met_est_samehouse<-svyby(formula = ~migrate,
                   by = ~met2013,
                   design=subset(des, age%in%18:120),
                   FUN=svymean,
                   na.rm=T )

met_est_marital<-svyby(formula = ~marital,
                      by = ~met2013,
                      design=subset(des, migrate==1),
                      FUN=svymean,
                      na.rm=T )

met_est_age<-svyby(formula = ~agecat,
                        by = ~met2013,
                        design=subset(des, migrate==1),
                        FUN=svymean,
                        na.rm=T )

head(met_est_samehouse)
##       met2013   migrate          se
## 0           0 0.8504018 0.003198709
## 12540   12540 0.8597531 0.003899065
## 17020   17020 0.8090850 0.007060367
## 20940   20940 0.8978229 0.007534780
## 23420   23420 0.8602213 0.003343725
## 25260   25260 0.8107385 0.009147284
head(met_est_marital)
##       met2013  marital          se
## 0           0 1.566685 0.003969696
## 12540   12540 1.630053 0.003954311
## 17020   17020 1.612303 0.007298586
## 20940   20940 1.671704 0.008742750
## 23420   23420 1.649251 0.003359611
## 25260   25260 1.625776 0.008821617
head(met_est_age)
##       met2013 agecat[0,18] agecat(18,20] agecat(20,30] agecat(30,40]
## 0           0    0.1983414    0.01761420    0.09289516     0.1129798
## 12540   12540    0.2961338    0.02712393    0.14361247     0.1347848
## 17020   17020    0.2097374    0.03050036    0.14508174     0.1114879
## 20940   20940    0.2824310    0.02981515    0.15102420     0.1289902
## 23420   23420    0.2874886    0.02659951    0.14119844     0.1361487
## 25260   25260    0.2861692    0.02119103    0.14808279     0.1554283
##       agecat(40,50] agecat(50,65] agecat(65,120] se.agecat[0,18]
## 0         0.1149168     0.2375264      0.2257263     0.003490344
## 12540     0.1212640     0.1684736      0.1086074     0.003951582
## 17020     0.1091705     0.2046513      0.1893708     0.006525106
## 20940     0.1202095     0.1603110      0.1272189     0.009348781
## 23420     0.1179922     0.1683697      0.1222029     0.003398160
## 25260     0.1234529     0.1599422      0.1057336     0.008396426
##       se.agecat(18,20] se.agecat(20,30] se.agecat(30,40] se.agecat(40,50]
## 0          0.001049319      0.002645495      0.002474986      0.002472711
## 12540      0.001331334      0.003100665      0.002722308      0.002576628
## 17020      0.003324332      0.006707896      0.004759546      0.004487834
## 20940      0.002920932      0.006859031      0.005998213      0.005473055
## 23420      0.001031444      0.002698786      0.002360374      0.002190485
## 25260      0.002487736      0.006829756      0.006602807      0.005757457
##       se.agecat(50,65] se.agecat(65,120]
## 0          0.003468241       0.003485648
## 12540      0.003111805       0.002511580
## 17020      0.006366536       0.006016907
## 20940      0.006533775       0.006381010
## 23420      0.002680021       0.002312830
## 25260      0.006637342       0.005741427
mets$met2013<-as.numeric(mets$GEOID)
geo3<-geo_join(mets, met_est_samehouse,by_sp= "met2013",by_df= "met2013")

Same house rates by Metro areas

Note, grey Metros are ones that are not identified in the ACS

tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo3)+
  tm_polygons("migrate",
              style="kmeans",
              n=8,
              legend.hist = TRUE) +
 tm_layout(legend.outside = TRUE,
            title = "Percent of population who live in the same house as last year in California Metro Areas 2019")  
mets$met2013<-as.numeric(mets$GEOID)
geo3<-geo_join(mets, met_est_marital,by_sp= "met2013",by_df= "met2013")

Married rates of same house by Metro areas

tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo3)+
  tm_polygons("marital",
              style="kmeans",
              n=8,
              legend.hist = TRUE) +
 tm_layout(legend.outside = TRUE,
            title = "Percent of population who live in the same house as last year who are married in California Metro Areas 2019")