GIS HW7

Estimating Population Characteristics using IPUMS USA

library(ipumsr)
cat <- read_ipums_ddi("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7093 GIS/GIS Data/usa_00006.xml")
data <- read_ipums_micro(cat)

## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.

data<-haven::zap_labels(data) #necessary to avoid problems with "labeled" data

Additional Packages

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(ggplot2)
library(tigris)

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

library(classInt)
library(tmap)

Download geographic data for Public Use Microdata Areas(PUMAS)

options(tigris_class = "sf")
pumas<-pumas(state = "CA", year = 2019, cb = T)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%

plot(pumas["GEOID10"], main = "Public Use Microdata Areas in California")

names(data)<-tolower(names(data)) # Change variable names to lower case

Recode Variables

#weight variables, these have implied decimal places so we have to divde by 100, following the codebook
data$pwt <- data$perwt/100
data$hwt <- data$hhwt/100

#race/ethnicity
data$hisp <- Recode(data$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
data$race_rec <- Recode(data$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
data$race_eth <- interaction(data$hisp, data$race_rec, sep = "_")
data$race_eth  <- as.factor(ifelse(substr(as.character(data$race_eth),1,8) == "Hispanic", "Hispanic", as.character(data$race_eth)))
data$race_eth <- relevel(data$race_eth, ref = "NonHispanic_White")

#sex
data$male <- ifelse(data$sex == 1,1,0)

#age in 10 year intervals
data$agecat<-cut(data$age, breaks = c(0, 18, 20, 30, 40, 50, 65, 120), include.lowest = T)

#education
data$educ_level<- Recode(data$educd, recodes = "2:61='0LT_HS';62:64='1_HSD/GED';65:80='2_somecoll';90:100='2_somecoll'; 81:83='3_AssocDegree';101='4_bachelordegree'; 110:116='4_BAplus_GradDegree'; else=NA")

# Migration
data$migr8 <- Recode(data$migrate1, recodes = "1=0; 2:4=1; else=NA")

# Poverty
data$pov <- ifelse(data$poverty <= 100, "at or below",
                   ifelse(data$poverty >100, "above", NA))

data$pov2 <- Recode(data$pov, recodes = "'at or below'=1; 'above'=0; else=NA")

Survey Design Object

dog<-svydesign(ids = ~cluster,
               strata = ~ strata,
               weights = ~pwt,
               data = data)

Survey Estimation for PUMAS

puma_est_edu<-svyby(formula = ~educ_level,
                    by = ~puma,
                    design = subset(dog, age>25),
                    FUN=svymean,
                    na.rm = TRUE )

puma_est_migr8<-svyby(formula = ~migr8,
                    by = ~puma,
                    design = dog,
                    FUN=svymean,
                    na.rm = TRUE )

head(puma_est_edu)

##     puma educ_level0LT_HS educ_level1_HSD/GED educ_level2_somecoll
## 101  101       0.03771076          0.06569605            0.1166131
## 102  102       0.16054167          0.15357159            0.1579987
## 103  103       0.05152757          0.08422222            0.1280499
## 104  104       0.31012651          0.24563983            0.2083160
## 105  105       0.13347136          0.18646181            0.1905626
## 106  106       0.14333595          0.23360220            0.2146757
##     educ_level3_AssocDegree educ_level4_bachelordegree
## 101              0.04322687                  0.3299605
## 102              0.06094609                  0.2850304
## 103              0.05430587                  0.3451345
## 104              0.05510831                  0.1160381
## 105              0.07115222                  0.2688053
## 106              0.07644186                  0.2248899
##     educ_level4_BAplus_GradDegree se.educ_level0LT_HS se.educ_level1_HSD/GED
## 101                    0.40679279         0.003905639            0.004448080
## 102                    0.18191149         0.006521332            0.006326229
## 103                    0.33675991         0.004472209            0.005309984
## 104                    0.06477123         0.011587978            0.009193415
## 105                    0.14954676         0.006885012            0.007559093
## 106                    0.10705441         0.007508320            0.008802248
##     se.educ_level2_somecoll se.educ_level3_AssocDegree
## 101             0.005691604                0.003405890
## 102             0.005625829                0.003542165
## 103             0.005791771                0.004021030
## 104             0.008470772                0.004609917
## 105             0.006335683                0.004081108
## 106             0.007564748                0.004487001
##     se.educ_level4_bachelordegree se.educ_level4_BAplus_GradDegree
## 101                   0.008030658                      0.008809076
## 102                   0.007069620                      0.005803563
## 103                   0.008150823                      0.008192403
## 104                   0.006230842                      0.004944989
## 105                   0.007454848                      0.005514219
## 106                   0.008090311                      0.005358965

head(puma_est_migr8)

##     puma      migr8          se
## 101  101 0.25576024 0.009366589
## 102  102 0.17404355 0.006778309
## 103  103 0.12507773 0.006679989
## 104  104 0.10950199 0.008340314
## 105  105 0.11113104 0.006785526
## 106  106 0.07729941 0.006058942

pumas$puma<-as.numeric(pumas$PUMACE10)

geo1<-geo_join(pumas, puma_est_migr8, by_sp="puma",by_df= "puma")

## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

head(geo1)

## Simple feature collection with 6 features and 12 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.4096 ymin: 33.46296 xmax: -116.8412 ymax: 41.46584
## geographic CRS: NAD83
##   STATEFP10 PUMACE10       AFFGEOID10 GEOID10
## 1        06    09702 7950000US0609702 0609702
## 2        06    02300 7950000US0602300 0602300
## 3        06    08506 7950000US0608506 0608506
## 4        06    06506 7950000US0606506 0606506
## 5        06    08900 7950000US0608900 0608900
## 6        06    06103 7950000US0606103 0606103
##                                                                     NAME10
## 1            Sonoma County (South)--Petaluma, Rohnert Park & Cotati Cities
## 2                                                          Humboldt County
## 3 Santa Clara County (East)--Gilroy, Morgan Hill & San Jose (South) Cities
## 4                    Riverside County (Southwest)--Hemet City & East Hemet
## 5                                              Shasta County--Redding City
## 6         Placer County (East/High Country Region)--Auburn & Colfax Cities
##   LSAD10    ALAND10   AWATER10 puma     migr8          se rank
## 1     P0  344472696    7555813 9702 0.1467477 0.009686046    1
## 2     P0 9241426488 1253864712 2300 0.1980057 0.009648508    1
## 3     P0 2152449674   13432167 8506 0.1223848 0.009286238    1
## 4     P0  645481741    4500965 6506 0.1671732 0.010520474    1
## 5     P0 9778407493  186302040 8900 0.1424680 0.007507578    1
## 6     P0 3094034997  246117939 6103 0.1258724 0.009498078    1
##                         geometry
## 1 MULTIPOLYGON (((-122.7418 3...
## 2 MULTIPOLYGON (((-124.4086 4...
## 3 MULTIPOLYGON (((-121.8558 3...
## 4 MULTIPOLYGON (((-117.1456 3...
## 5 MULTIPOLYGON (((-123.0688 4...
## 6 MULTIPOLYGON (((-121.4104 3...

tmap_mode("plot")

## tmap mode set to plotting

Map of Migration Rates in California by PUMAs

tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo1)+
  tm_polygons("migr8",
              style="kmeans",
              palette="PuBu",
              n=8,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Migration rate in California PUMAs \n 2015-2019")

## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1

GIS HW7

Daniel Mamani

4/15/2021

Estimating Population Characteristics using IPUMS USA

Download geographic data for Public Use Microdata Areas(PUMAS)

Recode Variables

Survey Design Object

Survey Estimation for PUMAS

Map of Migration Rates in California by PUMAs