Migration Rates Exploration

Set libraries and directories

library(ipumsr)

## Warning: package 'ipumsr' was built under R version 4.1.3

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.1.3

## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.4.0      v purrr   1.0.1 
## v tibble  3.1.8      v dplyr   1.0.10
## v tidyr   1.2.1      v stringr 1.5.0 
## v readr   2.1.3      v forcats 0.5.2

## Warning: package 'ggplot2' was built under R version 4.1.3

## Warning: package 'tibble' was built under R version 4.1.3

## Warning: package 'tidyr' was built under R version 4.1.3

## Warning: package 'readr' was built under R version 4.1.3

## Warning: package 'purrr' was built under R version 4.1.3

## Warning: package 'dplyr' was built under R version 4.1.3

## Warning: package 'stringr' was built under R version 4.1.3

## Warning: package 'forcats' was built under R version 4.1.3

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(knitr)

## Warning: package 'knitr' was built under R version 4.1.3

library(scales)

## Warning: package 'scales' was built under R version 4.1.3

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

# Change these directories to where your data file is stored and to where you want to save plots; these are in a subdirectory of the R project where the .Rmd file is located.
dataDir <- "./data"  #read data file dir
dataDir2 <- "./plots"  #save plot files dir

options(scipen=99, digits=5)

Read the data

ddi <- read_ipums_ddi(file.path(dataDir,"usa_00001.xml")) # ACS 2019
data <- read_ipums_micro(ddi)

## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.

kable(head(data))

YEAR	SAMPLE	SERIAL	CBSERIAL	HHWT	CLUSTER	STATEFIP	MET2013	STRATA	GQ	PERNUM	PERWT	SEX	AGE	RACE	RACED	BPL	BPLD	RACAMIND	RACASIAN	RACBLK	RACPACIS	RACWHT	RACOTHER	RACNUM	EDUC	EDUCD	MIGRATE1	MIGRATE1D	MIGPLAC1	MIGMET131
2019	201901	1	2019010000088	11	2019000000011	1	0	220001	4	1	11	1	39	2	200	1	100	1	1	2	1	1	1	1	4	40	1	10	0	0
2019	201901	2	2019010000096	70	2019000000021	1	0	100001	3	1	70	2	21	1	100	13	1300	1	1	1	1	2	1	1	4	40	2	23	1	0
2019	201901	3	2019010000153	20	2019000000031	1	11500	110001	4	1	20	1	19	2	200	1	100	1	1	2	1	1	1	1	7	71	2	24	1	0
2019	201901	4	2019010000198	79	2019000000041	1	11500	110001	3	1	79	1	77	1	100	1	100	1	1	1	1	2	1	1	3	30	2	23	1	11500
2019	201901	5	2019010000205	53	2019000000051	1	33660	270101	3	1	53	1	41	2	200	1	100	1	1	2	1	1	1	1	3	30	2	23	1	33660
2019	201901	6	2019010000215	77	2019000000061	1	33860	200001	4	1	77	1	18	2	200	26	2600	1	1	2	1	1	1	1	6	65	3	32	26	19820

# extract variable names the traditional way
varname <- as.data.frame(colnames(data))

# extract labels
n <- ncol(data)
labels_list <- map(1:n, function(x) attr(data[[x]], "label") )

labels_vector <- map_chr(1:n, function(x) attr(data[[x]], "label") )

# ddi info
names(ddi)

##  [1] "file_name"        "file_path"        "file_type"        "ipums_project"   
##  [5] "extract_date"     "extract_notes"    "rectypes"         "rectype_idvar"   
##  [9] "rectypes_keyvars" "var_info"         "conditions"       "citation"        
## [13] "file_encoding"

# extract file
ddi$file_name

## [1] "usa_00001.dat"

# description of extract file
ddi$extract_notes

## [1] "User-provided description:  2019 ACS migration"

# variables in extract
ddi$var_info

## # A tibble: 34 x 10
##    var_name var_l~1 var_d~2 val_la~3 code_~4 start   end imp_d~5 var_t~6 recty~7
##    <chr>    <chr>   <chr>   <list>   <chr>   <dbl> <dbl>   <dbl> <chr>   <lgl>  
##  1 YEAR     Census~ "YEAR ~ <tibble>  <NA>       1     4       0 integer NA     
##  2 SAMPLE   IPUMS ~ "SAMPL~ <tibble>  <NA>       5    10       0 integer NA     
##  3 SERIAL   Househ~ "SERIA~ <tibble> "Codes~    11    18       0 numeric NA     
##  4 CBSERIAL Origin~ "CBSER~ <tibble> "Codes~    19    31       0 numeric NA     
##  5 HHWT     Househ~ "HHWT ~ <tibble> "Codes~    32    41       2 numeric NA     
##  6 CLUSTER  Househ~ "CLUST~ <tibble> "Codes~    42    54       0 numeric NA     
##  7 STATEFIP State ~ "STATE~ <tibble>  <NA>      55    56       0 integer NA     
##  8 MET2013  Metrop~ "A met~ <tibble>  <NA>      57    61       0 integer NA     
##  9 STRATA   Househ~ "STRAT~ <tibble> "Codes~    62    73       0 numeric NA     
## 10 GQ       Group ~ "GQ cl~ <tibble>  <NA>      74    74       0 integer NA     
## # ... with 24 more rows, and abbreviated variable names 1: var_label,
## #   2: var_desc, 3: val_labels, 4: code_instr, 5: imp_decim, 6: var_type,
## #   7: rectypes

# details on variable in extract
ipums_var_label(ddi, MIGRATE1)

## [1] "Migration status, 1 year [general version]"

ipums_val_labels(ddi, MIGRATE1)

## # A tibble: 6 x 2
##     val lbl                 
##   <dbl> <chr>               
## 1     0 N/A                 
## 2     1 Same house          
## 3     2 Moved within state  
## 4     3 Moved between states
## 5     4 Abroad one year ago 
## 6     9 Unknown

# make a new variable from state name from statefips labels;  these is not used in this script but useful and easy to read for later analysis
STATEFIP <- ipums_val_labels(ddi, STATEFIP)  

# add statefips labels to data as variable STATENAME
data$STATENAME <- as_factor(data$STATEFIP)

# interactively view variables, labels/descriptions, values, generates a web page
ipums_view(ddi)

# file name

Compute Unweighted Migration Rate

This code block computes the percentage of the US population that migrates interstate based on the 2019 ACS sample disregarding the person weight that counts for the significance of representation of each record.The result migration rate is 2.26%.

migrate_national_unweighted <- data %>% 
  filter(AGE>=1) %>% 
  mutate(n=1) %>% 
  summarise(pop=sum(n),
         interstate_movers=sum(n[MIGRATE1==3])) %>%
  mutate(interstate_mig_rate=interstate_movers/pop)

kable(head(migrate_national_unweighted))

pop	interstate_movers	interstate_mig_rate
3210231	72688	0.02264

Compute Weighted Migration Rate

This code block computes the percentage of the US population that migrates interstate based on the 2019 ACS sample counting in the person weight, displaying a more accurate estimation of the US interstate migration in 2019. The result migration rate is 2.31%

migrate_national_weighted <- data %>% 
  filter(AGE>=1) %>% 
  summarise(pop=sum(PERWT),
         interstate_movers=sum(PERWT[MIGRATE1==3])) %>%
  mutate(interstate_mig_rate=interstate_movers/pop)

kable(head(migrate_national_weighted))

pop	interstate_movers	interstate_mig_rate
324737606	7501876	0.0231

Compute Weighted Migration Rates by Gender/Sex

This code block computes the percentage of US interstate movers for male and female in 2019, counting in person weight. Sex 1 representing male has a migration rate of 2.38%; sex 2 representing female has a migration rate of 2.25%, which is slightly lower than that of male.

migrate_national_sex <- data %>% 
  filter(AGE>=1) %>% 
  group_by(SEX) %>% 
  summarise(pop=sum(PERWT),
         interstate_movers=sum(PERWT[MIGRATE1==3])) %>%
  mutate(interstate_mig_rate=interstate_movers/pop)

kable(head(migrate_national_sex))

SEX	pop	interstate_movers	interstate_mig_rate
1	159795828	3796339	0.02376
2	164941778	3705537	0.02247

Compute Weighted Migration Rates by Age

This code block computes the percentage of US interstate movers for each age within the age range (age has to be older than 0) in 2019, counting in person weight. Below is the migration rate for the first six age.

migrate_age <- data %>% 
  filter(AGE>=1) %>% 
  group_by(AGE) %>% 
  summarise(pop=sum(PERWT),
         interstate_movers=sum(PERWT[MIGRATE1==3])) %>%
  mutate(interstate_mig_rate=interstate_movers/pop)

kable(head(migrate_age))

AGE	pop	interstate_movers	interstate_mig_rate
1	3777331	114018	0.03018
2	3913567	118238	0.03021
3	4016207	94121	0.02344
4	4106801	94470	0.02300
5	3880268	91160	0.02349
6	3869326	80178	0.02072

Plot migration rates by age: a migration schedule by age

A clear pattern

ggplot(data=migrate_age, aes(x=AGE, y=interstate_mig_rate)) +
  geom_line()+
  geom_point() +
  scale_x_continuous(name="Age") +
  scale_y_continuous(name="Migration Rate")

Smooth the age schedule, add title etc, plot using percentages

Get rid of the jerky trend line and replace with a smoothed one. It does not capture the extremes but does show the general trends.

ggplot(data=migrate_age, aes(x=AGE, y=interstate_mig_rate)) +
  geom_point() +
  geom_smooth(span=.24) +
  scale_x_continuous(name="Age", limits=c(0,100), breaks=seq(0,100, by = 5)) +
  scale_y_continuous(name="Migration Rate", limits=c(0,0.06), breaks=seq(0,0.6, by =0.01), 
                     labels=percent) +
  labs(title = "Age Schedule for US Interstate Migration") +
  theme(text=element_text(size=14))

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 2 rows containing missing values (`geom_point()`).

Compute Migration Rate by Race

This code block computes the percentage of US interstate movers for each racial group (White, Black, American Indian, Asian, Other, Mixed, and Latino) in 2019, counting in person weight.

migrate_race <- data %>% 
  filter(AGE>=1) %>% 
  mutate(RACE_ETHNIC=case_when(
    HISPAN==0 & RACE==1 ~ "White",
    HISPAN==0 & RACE==2 ~ "Black",
    HISPAN==0 & RACE==3 ~ "American Indian",
    HISPAN==0 & (RACE>=4 & RACE <=6) ~ "Asian",
    HISPAN==0 & RACE==7 ~ "Other",
    HISPAN==0 & RACE>=8 ~ "Mixed",
    HISPAN>0 ~ "Latinx")) %>% 
  group_by(RACE_ETHNIC) %>% 
  summarise(pop=sum(PERWT),
         interstate_movers=sum(PERWT[MIGRATE1==3])) %>%
  mutate(interstate_mig_rate=interstate_movers/pop)

kable(migrate_race)

RACE_ETHNIC	pop	interstate_movers	interstate_mig_rate
American Indian	2187817	54078	0.02472
Asian	18681193	503396	0.02695
Black	40193456	867732	0.02159
Latinx	59604788	1037598	0.01741
Mixed	8224028	268469	0.03264
Other	817627	14735	0.01802
White	195028697	4755868	0.02439

It appears in the table that some minority groups have a relatively lower rate of interstate migration. One possible explanation could be that minority groups are less disperse and more concentrated in the nation as they have a need to attach to their co-ethnic communities. Moving to a different state for minority groups not only means facing new networking and working/living environments, but also the need to discover local racial groups, which could be difficult in some states. Given this aspect, minority groups might be less inclined to move.

Migration Rates by Age for Each Racial Group

This code block computes the percentage of US interstate movers for each racial group by age in 2019, counting in person weight.

migrate_age_race <- data %>%
  filter(AGE>=1) %>%
  mutate(RACE_ETHNIC=case_when(
    HISPAN==0 & RACE==1 ~ "White",
    HISPAN==0 & RACE==2 ~ "Black",
    HISPAN==0 & RACE==3 ~ "American Indian",
    HISPAN==0 & (RACE>=4 & RACE <=6) ~ "Asian",
    HISPAN==0 & RACE==7 ~ "Other",
    HISPAN==0 & RACE>=8 ~ "Mixed",
    HISPAN>0 ~ "Latinx")) %>%
  group_by(AGE, RACE_ETHNIC) %>%
  summarise(pop=sum(PERWT),
            interstate_movers=sum(PERWT[MIGRATE1==3])) %>%
  mutate(interstate_mig_rate=interstate_movers/pop)

## `summarise()` has grouped output by 'AGE'. You can override using the `.groups`
## argument.

kable(head(migrate_age_race))

AGE	RACE_ETHNIC	pop	interstate_movers	interstate_mig_rate
1	American Indian	26547	1135	0.04275
1	Asian	170907	4570	0.02674
1	Black	494926	13377	0.02703
1	Latinx	984859	25604	0.02600
1	Mixed	228702	6664	0.02914
1	Other	16941	241	0.01423

Plot migration rates by age for each racial group: Age Schedule for US Interstate Migration for Each Racial Group

ggplot(data=migrate_age_race, aes(x=AGE, y=interstate_mig_rate)) +
  geom_smooth(aes(color=RACE_ETHNIC),span=.24)+
  geom_point(size=0.7) +
  scale_x_continuous(name="Age", limits=c(0,100), breaks=seq(0,100, by = 5)) +
  scale_y_continuous(name="Migration Rate", limits=c(0,0.06), breaks=seq(0,0.6, by =0.01), 
                     labels=percent) +
  labs(title = "Age Schedule for US Interstate Migration for Each Race") +
  theme(text=element_text(size=14))

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Warning: Removed 18 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 8 rows containing missing values (`geom_smooth()`).

## Warning: Removed 18 rows containing missing values (`geom_point()`).