Multigenerational household estimates

Author

R. Luttinen

#read in data from IPUMS

library(ipumsr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ddi=read_ipums_ddi("C:/Users/lutti013/Downloads/cps_00005.xml")
cps=read_ipums_micro(ddi, data_file="C:/Users/lutti013/Downloads/cps_00005.dat.gz")
Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.
unzip("C:/Users/lutti013/Downloads/cps_00005.dat.gz")
Warning in unzip("C:/Users/lutti013/Downloads/cps_00005.dat.gz"): error 1 in
extracting from zip file
#2021 analysis

cps21<-filter(cps, YEAR==2021)

cps21<-cps21%>%
  distinct(HRHHID,PERNUM, .keep_all=TRUE)


#make separate dataframes for householder and household members

cpshouseholder21<-filter(cps21, RELATE==101)

  
cpsfamilymembers21<-filter(cps21, RELATE==501| RELATE==301| RELATE==303| RELATE==901| RELATE==1001)

#for everyone

cpsfull21<-select(cps21, CPSID, ASECWT,  ASECWTH, YEAR, HRHHID, HRHHID2, PERNUM, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID, MOMLOC, MOMLOC2, POPLOC, POPLOC2, HSEQ)


aggfull21<-aggregate(cpshouseholder21$ASECWTH, by=list(cpshouseholder21$YEAR), FUN=sum)

aggfull21
  Group.1         x
1    2021 121318561
#select variables of interest


members21<-select(cpsfamilymembers21, YEAR, HRHHID, HRHHID2, PERNUM, CPSID, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID, MOMLOC, MOMLOC2, POPLOC, POPLOC2, ASECWT,  ASECWTH, HSEQ)

members21<-members21%>%
  dplyr::filter(RELATE<9900)


members21<-members21%>%
  mutate(RELATE=as.factor(RELATE))%>%
  mutate(relationship= recode(RELATE, '301'= "child", '303'= "stepchild", '501'= "parent", '901'= "grandchild", '1001'= "other relatives",.default = NA_character_))


#pivot wider

memberswide21<-members21%>%
  pivot_wider(id_cols=c(HRHHID, HRHHID2), names_from=PERNUM, values_from=c(relationship, AGE, NATIVITY, RACE, HISPAN))


base21<-select(cpshouseholder21, YEAR, HRHHID, HRHHID2, PERNUM, CPSID, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID,MOMLOC, MOMLOC2, POPLOC, POPLOC2, ASECWT,  ASECWTH, HSEQ)

#merge both dataframes
multigen21<-left_join(base21, memberswide21, by= "HRHHID")

multigen21<-multigen21%>%
  distinct(HRHHID,PERNUM, .keep_all=TRUE)
#recode household dataframe 

#relationship to household head


#create a variable for adult children

multigen21<-multigen21%>% 
  dplyr::mutate(adultchild = case_when(relationship_2== 'child' & AGE_2>=25~1,
                                relationship_2== 'grandchild' & AGE_2>=25~1,
                                relationship_3== 'child' & AGE_3>=25~1, 
                                relationship_3== 'grandchild' & AGE_3>=25~1,
                                relationship_4== 'child' & AGE_4>=25~1, 
                                relationship_4== 'grandchild' & AGE_4>=25~1,
                                relationship_5== 'child' & AGE_5>=25~1,
                                relationship_5== 'grandchild' & AGE_5>=25~1,
                                relationship_6== 'child' & AGE_6>=25~1, 
                                relationship_6== 'grandchild' & AGE_6>=25~1,
                                relationship_7== 'child' & AGE_7>=25~1, 
                                relationship_7== 'grandchild' & AGE_7>=25~1,
                                relationship_8== 'child' & AGE_8>=25~1, 
                                relationship_8== 'grandchild' & AGE_8>=25~1,
                                relationship_9== 'child' & AGE_9>=25~1, 
                                relationship_9== 'grandchild' & AGE_9>=25~1,
                                relationship_10== 'child' & AGE_10>=25~1, 
                                relationship_10== 'grandchild' & AGE_10>=25~1,
                                relationship_11== 'child' & AGE_11>=25~1,
                                relationship_11== 'grandchild' & AGE_11>=25~1,
                                relationship_12== 'child' & AGE_12>=25~1, 
                                relationship_12== 'grandchild' & AGE_12>=25~1,
                                relationship_13== 'child' & AGE_13>=25~1, 
                                relationship_13== 'grandchild' & AGE_13>=25~1,
                                relationship_14== 'child' & AGE_14>=25~1, 
                                relationship_14== 'grandchild' & AGE_14>=25~1,
                                relationship_15== 'child' & AGE_15>=25~1, 
                                relationship_15== 'grandchild' & AGE_15>=25~1, 
                                 TRUE~0))

#create a variable for other relatives living in household

multigen21<-multigen21%>% 
  dplyr::mutate(adultrelatives = case_when(relationship_2== 'other relatives' & AGE_2>=25~1,
                                relationship_2== 'other  relatives' & AGE_2>=25~1,
                                relationship_3== 'other relatives' & AGE_3>=25~1, 
                                relationship_3== 'other  relatives' & AGE_3>=25~1,
                                relationship_4== 'other relatives' & AGE_4>=25~1, 
                                relationship_4== 'other  relatives' & AGE_4>=25~1,
                                relationship_5== 'other relatives' & AGE_5>=25~1,
                                relationship_5== 'other  relatives' & AGE_5>=25~1,
                                relationship_6== 'other relatives' & AGE_6>=25~1, 
                                relationship_6== 'other  relatives' & AGE_6>=25~1,
                                relationship_7== 'other relatives' & AGE_7>=25~1, 
                                relationship_7== 'other  relatives' & AGE_7>=25~1,
                                relationship_8== 'other relatives' & AGE_8>=25~1, 
                                relationship_8== 'other  relatives' & AGE_8>=25~1,
                                relationship_9== 'other relatives' & AGE_9>=25~1, 
                                relationship_9== 'other  relatives' & AGE_9>=25~1,
                                relationship_10== 'other relatives' & AGE_10>=25~1, 
                                relationship_10== 'other  relatives' & AGE_10>=25~1,
                                relationship_11== 'other relatives' & AGE_11>=25~1,
                                relationship_11== 'other  relatives' & AGE_11>=25~1,
                                relationship_12== 'other relatives' & AGE_12>=25~1, 
                                relationship_12== 'other  relatives' & AGE_12>=25~1,
                                relationship_13== 'other relatives' & AGE_13>=25~1, 
                                relationship_13== 'other  relatives' & AGE_13>=25~1,
                                relationship_14== 'other relatives' & AGE_14>=25~1, 
                                relationship_14== 'other  relatives' & AGE_14>=25~1,
                                relationship_15== 'other relatives' & AGE_15>=25~1, 
                                relationship_15== 'other  relatives' & AGE_15>=25~1, 
                                 TRUE~0))


#create a variable for multiple adults in household other than householder: could be a parent or relative 

multigen21$multipleadultrelatives<- as.numeric(apply(multigen21, 1, function(x) {
  any(x[grep("^relationship_", names(x))] == "other relatives" & 
      x[grep("^AGE_", names(x))] > 25)
}))


#create a variable for whether there is a grandchild present regardless of their age

multigen21<-multigen21%>% 
  dplyr::mutate(grandchildpresent= case_when(
                                relationship_2== 'grandchild' ~1,
                                relationship_3== 'grandchild' ~1,
                                relationship_4== 'grandchild'~1,
                                relationship_5== 'grandchild' ~1,
                                relationship_6== 'grandchild'~1, 
                                relationship_7== 'grandchild'~1,
                                relationship_8== 'grandchild' ~1,
                                relationship_9== 'grandchild' ~1,
                                relationship_10== 'grandchild' ~1,
                                relationship_11== 'grandchild' ~1,
                                relationship_12== 'grandchild' ~1,
                                relationship_13== 'grandchild' ~1,
                                relationship_14== 'grandchild' ~1,
                                relationship_15== 'grandchild'~1,
                                 TRUE~0))



multigen21<-multigen21%>% 
  dplyr::mutate(adulthouseholder = case_when(AGE>=25~1,
                                 TRUE~0))




#create a variable for multigenerational household

multigen21 <- multigen21 %>% 
  dplyr::mutate(multigen = case_when(
    adulthouseholder == 1 & adultchild == 1 ~ 1,
    adulthouseholder == 1 & POPLOC != 0 ~ 1,
    adulthouseholder == 1 & MOMLOC != 0 ~ 1,
    adulthouseholder == 1 & POPLOC2 != 0 ~ 1,
    adulthouseholder == 1 & MOMLOC2 != 0 ~ 1,
    adulthouseholder == 1 & adultrelatives == 1 ~ 1,
    adulthouseholder == 1 & grandchildpresent == 1 ~ 1,
    adulthouseholder == 0 & multipleadultrelatives == 1 ~ 1,
    adulthouseholder == 0 & POPLOC != 0 & adultrelatives == 1 ~ 1,
    adulthouseholder == 0 & MOMLOC != 0 & adultrelatives == 1 ~ 1,
    adulthouseholder == 0 & POPLOC2 != 0 & adultrelatives == 1 ~ 1,
    adulthouseholder == 0 & MOMLOC2 != 0 & adultrelatives == 1 ~ 1,
    TRUE ~ 0
  ))


#count of all multigenerational households 


multigenfilter21<-multigen21%>%
  filter(multigen==1)


agg21<-aggregate(multigenfilter21$ASECWTH, by=list(multigenfilter21$YEAR), FUN=sum)
  
agg21      
  Group.1        x
1    2021 15445794
15445794/121318561
[1] 0.127316
#create dataframe with just household ID and multigen tag

linking21<-select(multigen21, HRHHID, HRHHID2.x, HRHHID2.y, CPSID, FAMREL, FAMUNIT, ASECWTH, HSEQ, multigen)


linking21<-rename(linking21, HRHHID2= HRHHID2.x)


#merge with original dataframe to get this at the individual level

all2021<-merge(linking21, cpsfull21, by= "HRHHID")
Warning in merge.data.frame(linking21, cpsfull21, by = "HRHHID"): column name
'HRHHID2.y' is duplicated in the result
#get agg counts

total21<-aggregate(all2021$ASECWT, by=list(all2021$multigen), FUN=sum)

total21
  Group.1         x
1       0 249789102
2       1  61461810
(61461810/(249789102+64161810))
[1] 0.1957689
#2023 analysis

cps23<-filter(cps, YEAR==2023)

cps23<-cps23%>%
  distinct(HRHHID,PERNUM, .keep_all=TRUE)


#make separate dataframes for householder and household members

cpshouseholder23<-filter(cps23, RELATE==101)

  
cpsfamilymembers23<-filter(cps23, RELATE==501| RELATE==301| RELATE==303| RELATE==901| RELATE==1001)

#for everyone

cpsfull23<-select(cps23, CPSID, ASECWT,  ASECWTH, YEAR, HRHHID, HRHHID2, PERNUM, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID, MOMLOC, MOMLOC2, POPLOC, POPLOC2, HSEQ)


aggfull23<-aggregate(cpshouseholder23$ASECWTH, by=list(cpshouseholder23$YEAR), FUN=sum)

aggfull23
  Group.1         x
1    2023 127950630
#select variables of interest


members23<-select(cpsfamilymembers23, YEAR, HRHHID, HRHHID2, PERNUM, CPSID, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID, MOMLOC, MOMLOC2, POPLOC, POPLOC2, ASECWT,  ASECWTH, HSEQ)

members23<-members23%>%
  dplyr::filter(RELATE<9900)


members23<-members23%>%
  mutate(RELATE=as.factor(RELATE))%>%
  mutate(relationship= recode(RELATE, '301'= "child", '303'= "stepchild", '501'= "parent", '901'= "grandchild", '1001'= "other relatives",.default = NA_character_))


#pivot wider

memberswide23<-members23%>%
  pivot_wider(id_cols=c(HRHHID, HRHHID2), names_from=PERNUM, values_from=c(relationship, AGE, NATIVITY, RACE, HISPAN))


base23<-select(cpshouseholder23, YEAR, HRHHID, HRHHID2, PERNUM, CPSID, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID,MOMLOC, MOMLOC2, POPLOC, POPLOC2, ASECWT,  ASECWTH, HSEQ)

#merge both dataframes
multigen23<-left_join(base23, memberswide23, by= "HRHHID")

multigen23<-multigen23%>%
  distinct(HRHHID,PERNUM, .keep_all=TRUE) 
#recode household dataframe 

#relationship to household head

#create a variable for adult children

multigen23<-multigen23%>% 
  dplyr::mutate(adultchild = case_when(relationship_2== 'child' & AGE_2>=25~1,
                                relationship_2== 'grandchild' & AGE_2>=25~1,
                                relationship_3== 'child' & AGE_3>=25~1, 
                                relationship_3== 'grandchild' & AGE_3>=25~1,
                                relationship_4== 'child' & AGE_4>=25~1, 
                                relationship_4== 'grandchild' & AGE_4>=25~1,
                                relationship_5== 'child' & AGE_5>=25~1,
                                relationship_5== 'grandchild' & AGE_5>=25~1,
                                relationship_6== 'child' & AGE_6>=25~1, 
                                relationship_6== 'grandchild' & AGE_6>=25~1,
                                relationship_7== 'child' & AGE_7>=25~1, 
                                relationship_7== 'grandchild' & AGE_7>=25~1,
                                relationship_8== 'child' & AGE_8>=25~1, 
                                relationship_8== 'grandchild' & AGE_8>=25~1,
                                relationship_9== 'child' & AGE_9>=25~1, 
                                relationship_9== 'grandchild' & AGE_9>=25~1,
                                relationship_10== 'child' & AGE_10>=25~1, 
                                relationship_10== 'grandchild' & AGE_10>=25~1,
                                relationship_11== 'child' & AGE_11>=25~1,
                                relationship_11== 'grandchild' & AGE_11>=25~1,
                                relationship_12== 'child' & AGE_12>=25~1, 
                                relationship_12== 'grandchild' & AGE_12>=25~1,
                                relationship_13== 'child' & AGE_13>=25~1, 
                                relationship_13== 'grandchild' & AGE_13>=25~1,
                                relationship_14== 'child' & AGE_14>=25~1, 
                                relationship_14== 'grandchild' & AGE_14>=25~1,
                                relationship_15== 'child' & AGE_15>=25~1, 
                                relationship_15== 'grandchild' & AGE_15>=25~1,
                                 TRUE~0))

#create a variable for other relatives living in household

multigen23<-multigen23%>% 
  dplyr::mutate(adultrelatives = case_when(relationship_2== 'other relatives' & AGE_2>=25~1,
                                relationship_2== 'other  relatives' & AGE_2>=25~1,
                                relationship_3== 'other relatives' & AGE_3>=25~1, 
                                relationship_3== 'other  relatives' & AGE_3>=25~1,
                                relationship_4== 'other relatives' & AGE_4>=25~1, 
                                relationship_4== 'other  relatives' & AGE_4>=25~1,
                                relationship_5== 'other relatives' & AGE_5>=25~1,
                                relationship_5== 'other  relatives' & AGE_5>=25~1,
                                relationship_6== 'other relatives' & AGE_6>=25~1, 
                                relationship_6== 'other  relatives' & AGE_6>=25~1,
                                relationship_7== 'other relatives' & AGE_7>=25~1, 
                                relationship_7== 'other  relatives' & AGE_7>=25~1,
                                relationship_8== 'other relatives' & AGE_8>=25~1, 
                                relationship_8== 'other  relatives' & AGE_8>=25~1,
                                relationship_9== 'other relatives' & AGE_9>=25~1, 
                                relationship_9== 'other  relatives' & AGE_9>=25~1,
                                relationship_10== 'other relatives' & AGE_10>=25~1, 
                                relationship_10== 'other  relatives' & AGE_10>=25~1,
                                relationship_11== 'other relatives' & AGE_11>=25~1,
                                relationship_11== 'other  relatives' & AGE_11>=25~1,
                                relationship_12== 'other relatives' & AGE_12>=25~1, 
                                relationship_12== 'other  relatives' & AGE_12>=25~1,
                                relationship_13== 'other relatives' & AGE_13>=25~1, 
                                relationship_13== 'other  relatives' & AGE_13>=25~1,
                                relationship_14== 'other relatives' & AGE_14>=25~1, 
                                relationship_14== 'other  relatives' & AGE_14>=25~1,
                                relationship_15== 'other relatives' & AGE_15>=25~1, 
                                relationship_15== 'other  relatives' & AGE_15>=25~1, 
                                 TRUE~0))


#create a variable for whether there is a grandchild in the household regardless if they are an adult or not

multigen23<-multigen23%>% 
  dplyr::mutate(grandchildpresent= case_when(
                                relationship_2== 'grandchild' ~1,
                                relationship_3== 'grandchild' ~1,
                                relationship_4== 'grandchild'~1,
                                relationship_5== 'grandchild' ~1,
                                relationship_6== 'grandchild'~1, 
                                relationship_7== 'grandchild'~1,
                                relationship_8== 'grandchild' ~1,
                                relationship_9== 'grandchild' ~1,
                                relationship_10== 'grandchild' ~1,
                                relationship_11== 'grandchild' ~1,
                                relationship_12== 'grandchild' ~1,
                                relationship_13== 'grandchild' ~1,
                                relationship_14== 'grandchild' ~1,
                                relationship_15== 'grandchild'~1,
                                 TRUE~0))

#create a variable for child present in general

multigen23<-multigen23%>% 
  dplyr::mutate(childpresent= case_when(
                                relationship_2== 'child' ~1,
                                relationship_3== 'child' ~1,
                                relationship_4== 'child'~1,
                                relationship_5== 'child' ~1,
                                relationship_6== 'child'~1, 
                                relationship_7== 'child'~1,
                                relationship_8== 'child' ~1,
                                relationship_9== 'child' ~1,
                                relationship_10== 'child' ~1,
                                relationship_11== 'child' ~1,
                                relationship_12== 'child' ~1,
                                relationship_13== 'child' ~1,
                                relationship_14== 'child' ~1,
                                relationship_15== 'child'~1,
                                 TRUE~0))




#create a variable for multigenerational household

multigen23<-multigen23%>% 
  dplyr::mutate(multigen = case_when(adultchild==1~ 1, 
                              POPLOC!= 0 ~ 1,
                              MOMLOC!= 0 ~ 1,
                              POPLOC2!= 0 ~ 1,
                              MOMLOC2!= 0 ~ 1, 
                              adultrelatives==1~1,
                              grandchildpresent==1~1,
                              TRUE~0))


#create dichotomous variable for whether a parent, their child and their grandchild live in the same household

multigen23<-multigen23%>% 
  dplyr::mutate(threegen = case_when(grandchildpresent==1 & childpresent==1 ~1,
                              TRUE~0))
#count of all multigenerational households 


multigenfilter23<-multigen23%>%
  filter(multigen==1)


agg23<-aggregate(multigenfilter23$ASECWTH, by=list(multigenfilter23$YEAR), FUN=sum)

agg23
  Group.1        x
1    2023 16767091
16767091/127950630
[1] 0.1310434
#create dataframe with just household ID and multigen tag

linking23<-select(multigen23, HRHHID, HRHHID2.x, HRHHID2.y, CPSID, FAMREL, FAMUNIT, ASECWTH, HSEQ, multigen)


linking23<-rename(linking23, HRHHID2= HRHHID2.x)


#merge with original dataframe to get this at the individual level

all2023<-merge(linking23, cpsfull23, by= "HRHHID")
Warning in merge.data.frame(linking23, cpsfull23, by = "HRHHID"): column name
'HRHHID2.y' is duplicated in the result
#get agg counts

total23<-aggregate(all2023$ASECWT, by=list(all2023$multigen), FUN=sum)

total23
  Group.1         x
1       0 257471981
2       1  65575174
(65575174/(257471981+65575174))
[1] 0.2029895
linking2<-select(multigen23, HRHHID, threegen, multigen)

#merge with original dataframe to get this at the individual level

linked<-merge(cpsfull23, linking2, by= "HRHHID")


#get agg counts

threegen23<-aggregate(linked$ASECWT, by=list(linked$threegen), FUN=sum)

threegen23
  Group.1         x
1       0 311310551
2       1  11736604
(11736604/(11736604+311310551))
[1] 0.03633093
#create variables for stratified characteristics

#AGE: 0-17, 18-24, 25-39, 40-54, 55-64, 65-84, 85+

linked<-linked%>%
  mutate(agegroups = case_when(
      AGE>= 0 & AGE <= 17 ~ "0-17",
      AGE>= 18 & AGE <= 24 ~ "18-24",
    AGE>= 25 & AGE <= 39 ~ "25-39",
    AGE >= 40 & AGE <= 54 ~ "40-54",
     AGE >= 55 & AGE <= 64 ~ "55-64,",
     AGE >= 65 & AGE <= 84 ~ "65-84",
    AGE >= 85 ~ "85+"))

#gender: male vs. female

#race: non-hispanic white, non-hispanic black, asian non-hispanic, Native American/ Pacific Islander

linked<-linked%>%
  mutate(racethnicity = case_when(
      RACE== 100 & HISPAN == 000 ~ "Non-Hispanic White",
      RACE== 200 & HISPAN == 000 ~ "Non-Hispanic Black",
      HISPAN>0 ~ "Hispanic",
      RACE == 651 & HISPAN == 000 ~ "Asian",
     RACE == 652 & HISPAN == 000 ~  "American Indian/ Pacific Islander/ Native Hawaiian/ Native Alaskan",
     RACE == 300 & HISPAN == 000 ~ "American Indian/ Pacific Islander/ Native Hawaiian/ Native Alaskan",
     RACE>= 801 & RACE<= 830 ~ "Multiracial"))
    

#native born vs. foreign born

linked<-linked %>%
mutate(foreignbornstatus= case_when(
          NATIVITY>1 & NATIVITY<=4 ~ "Native born",
                    NATIVITY==5 ~ "Foreign born"))
#generate aggregate counts for individuals living in a household with a parent, child and grandchild present


agegroups23<-aggregate(linked$ASECWT, by=list(linked$threegen, linked$agegroups), FUN=sum)

agegroups23
   Group.1 Group.2           x
1        0    0-17 67420757.49
2        1    0-17  3840654.12
3        0   18-24 28451040.23
4        1   18-24  1405650.37
5        0   25-39 62893335.08
6        1   25-39  2288007.50
7        0   40-54 57479723.73
8        1   40-54  1808376.98
9        0  55-64, 39356061.65
10       1  55-64,  1136243.18
11       0   65-84 49798102.79
12       1   65-84  1185575.28
13       0     85+  5911530.52
14       1     85+    72096.91
raceethnicity23<-aggregate(linked$ASECWT, by=list(linked$threegen, linked$racethnicity), FUN=sum)

raceethnicity23
   Group.1                                                            Group.2
1        0 American Indian/ Pacific Islander/ Native Hawaiian/ Native Alaskan
2        1 American Indian/ Pacific Islander/ Native Hawaiian/ Native Alaskan
3        0                                                              Asian
4        1                                                              Asian
5        0                                                           Hispanic
6        1                                                           Hispanic
7        0                                                        Multiracial
8        1                                                        Multiracial
9        0                                                 Non-Hispanic Black
10       1                                                 Non-Hispanic Black
11       0                                                 Non-Hispanic White
12       1                                                 Non-Hispanic White
             x
1    3246009.0
2     334471.1
3   19480222.5
4     443914.1
5   58486376.5
6    3614213.6
7    6593868.3
8     417868.4
9   37517029.5
10   2442575.8
11 185987045.6
12   4483561.4
foreignbornstatus23<-aggregate(linked$ASECWT, by=list(linked$threegen, linked$foreignbornstatus), FUN=sum)

foreignbornstatus23
  Group.1      Group.2        x
1       0 Foreign born 49663953
2       1 Foreign born  1697979
3       0  Native born 38824417
4       1  Native born  1672085
sex23<-aggregate(linked$ASECWT, by=list(linked$threegen, linked$SEX), FUN=sum)

sex23
  Group.1 Group.2         x
1       0       1 154382578
2       1       1   5320176
3       0       2 156927974
4       1       2   6416428
#2024 analysis

cps24<-filter(cps, YEAR==2024)

cps24<-cps24%>%
  distinct(HRHHID,PERNUM, .keep_all=TRUE)


#make separate dataframes for householder and household members

cpshouseholder24<-filter(cps24, RELATE==101)

  
cpsfamilymembers24<-filter(cps24, RELATE==501| RELATE==301| RELATE==303| RELATE==901| RELATE==1001)

#for everyone

cpsfull24<-select(cps24, CPSID, ASECWT,  ASECWTH, YEAR, HRHHID, HRHHID2, PERNUM, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID, MOMLOC, MOMLOC2, POPLOC, POPLOC2, HSEQ)


aggfull24<-aggregate(cpshouseholder24$ASECWTH, by=list(cpshouseholder24$YEAR), FUN=sum)

aggfull24
  Group.1         x
1    2024 129008033
#select variables of interest


members24<-select(cpsfamilymembers24, YEAR, HRHHID, HRHHID2, PERNUM, CPSID, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID, MOMLOC, MOMLOC2, POPLOC, POPLOC2, ASECWT,  ASECWTH, HSEQ)

members24<-members24%>%
  dplyr::filter(RELATE<9900)


members24<-members24%>%
  mutate(RELATE=as.factor(RELATE))%>%
  mutate(relationship= recode(RELATE, '301'= "child", '303'= "stepchild", '501'= "parent", '901'= "grandchild", '1001'= "other relatives",.default = NA_character_))


#pivot wider

memberswide24<-members24%>%
  pivot_wider(id_cols=c(HRHHID, HRHHID2), names_from=PERNUM, values_from=c(relationship, AGE, NATIVITY, RACE, HISPAN))


base24<-select(cpshouseholder24, YEAR, HRHHID, HRHHID2, PERNUM, CPSID, RACE, HISPAN, NATIVITY, AGE, SEX, RELATE, FTYPE, FAMUNIT, FAMREL, FAMID,MOMLOC, MOMLOC2, POPLOC, POPLOC2, ASECWT,  ASECWTH, HSEQ)

#merge both dataframes
multigen24<-left_join(base24, memberswide24, by= "HRHHID")

multigen24<-multigen24%>%
  distinct(HRHHID,PERNUM, .keep_all=TRUE)
#recode household dataframe 

#relationship to household head


#create a variable for adult children

multigen24<-multigen24%>% 
  dplyr::mutate(adultchild = case_when(relationship_2== 'child' & AGE_2>=25~1,
                                relationship_2== 'grandchild' & AGE_2>=25~1,
                                relationship_3== 'child' & AGE_3>=25~1, 
                                relationship_3== 'grandchild' & AGE_3>=25~1,
                                relationship_4== 'child' & AGE_4>=25~1, 
                                relationship_4== 'grandchild' & AGE_4>=25~1,
                                relationship_5== 'child' & AGE_5>=25~1,
                                relationship_5== 'grandchild' & AGE_5>=25~1,
                                relationship_6== 'child' & AGE_6>=25~1, 
                                relationship_6== 'grandchild' & AGE_6>=25~1,
                                relationship_7== 'child' & AGE_7>=25~1, 
                                relationship_7== 'grandchild' & AGE_7>=25~1,
                                relationship_8== 'child' & AGE_8>=25~1, 
                                relationship_8== 'grandchild' & AGE_8>=25~1,
                                relationship_9== 'child' & AGE_9>=25~1, 
                                relationship_9== 'grandchild' & AGE_9>=25~1,
                                relationship_10== 'child' & AGE_10>=25~1, 
                                relationship_10== 'grandchild' & AGE_10>=25~1,
                                relationship_11== 'child' & AGE_11>=25~1,
                                relationship_11== 'grandchild' & AGE_11>=25~1,
                                relationship_12== 'child' & AGE_12>=25~1, 
                                relationship_12== 'grandchild' & AGE_12>=25~1,
                                relationship_13== 'child' & AGE_13>=25~1, 
                                relationship_13== 'grandchild' & AGE_13>=25~1,
                                relationship_14== 'child' & AGE_14>=25~1, 
                                relationship_14== 'grandchild' & AGE_14>=25~1,
                                relationship_15== 'child' & AGE_15>=25~1, 
                                relationship_15== 'grandchild' & AGE_15>=25~1, 
                                 TRUE~0))

#create a variable for other relatives living in household

multigen24<-multigen24%>% 
  dplyr::mutate(adultrelatives = case_when(relationship_2== 'other relatives' & AGE_2>=25~1,
                                relationship_2== 'other  relatives' & AGE_2>=25~1,
                                relationship_3== 'other relatives' & AGE_3>=25~1, 
                                relationship_3== 'other  relatives' & AGE_3>=25~1,
                                relationship_4== 'other relatives' & AGE_4>=25~1, 
                                relationship_4== 'other  relatives' & AGE_4>=25~1,
                                relationship_5== 'other relatives' & AGE_5>=25~1,
                                relationship_5== 'other  relatives' & AGE_5>=25~1,
                                relationship_6== 'other relatives' & AGE_6>=25~1, 
                                relationship_6== 'other  relatives' & AGE_6>=25~1,
                                relationship_7== 'other relatives' & AGE_7>=25~1, 
                                relationship_7== 'other  relatives' & AGE_7>=25~1,
                                relationship_8== 'other relatives' & AGE_8>=25~1, 
                                relationship_8== 'other  relatives' & AGE_8>=25~1,
                                relationship_9== 'other relatives' & AGE_9>=25~1, 
                                relationship_9== 'other  relatives' & AGE_9>=25~1,
                                relationship_10== 'other relatives' & AGE_10>=25~1, 
                                relationship_10== 'other  relatives' & AGE_10>=25~1,
                                relationship_11== 'other relatives' & AGE_11>=25~1,
                                relationship_11== 'other  relatives' & AGE_11>=25~1,
                                relationship_12== 'other relatives' & AGE_12>=25~1, 
                                relationship_12== 'other  relatives' & AGE_12>=25~1,
                                relationship_13== 'other relatives' & AGE_13>=25~1, 
                                relationship_13== 'other  relatives' & AGE_13>=25~1,
                                relationship_14== 'other relatives' & AGE_14>=25~1, 
                                relationship_14== 'other  relatives' & AGE_14>=25~1,
                                relationship_15== 'other relatives' & AGE_15>=25~1, 
                                relationship_15== 'other  relatives' & AGE_15>=25~1, 
                                 TRUE~0))


#create a variable for multiple adults in household other than householder: could be a parent or relative 

multigen24$multipleadultrelatives<- as.numeric(apply(multigen24, 1, function(x) {
  any(x[grep("^relationship_", names(x))] == "other relatives" & 
      x[grep("^AGE_", names(x))] > 25)
}))


#create a variable for whether there is a grandchild present regardless of their age

multigen24<-multigen24%>% 
  dplyr::mutate(grandchildpresent= case_when(
                                relationship_2== 'grandchild' ~1,
                                relationship_3== 'grandchild' ~1,
                                relationship_4== 'grandchild'~1,
                                relationship_5== 'grandchild' ~1,
                                relationship_6== 'grandchild'~1, 
                                relationship_7== 'grandchild'~1,
                                relationship_8== 'grandchild' ~1,
                                relationship_9== 'grandchild' ~1,
                                relationship_10== 'grandchild' ~1,
                                relationship_11== 'grandchild' ~1,
                                relationship_12== 'grandchild' ~1,
                                relationship_13== 'grandchild' ~1,
                                relationship_14== 'grandchild' ~1,
                                relationship_15== 'grandchild'~1,
                                 TRUE~0))


#create a variable for child present in general

multigen24<-multigen24%>% 
  dplyr::mutate(childpresent= case_when(
                                relationship_2== 'child' ~1,
                                relationship_3== 'child' ~1,
                                relationship_4== 'child'~1,
                                relationship_5== 'child' ~1,
                                relationship_6== 'child'~1, 
                                relationship_7== 'child'~1,
                                relationship_8== 'child' ~1,
                                relationship_9== 'child' ~1,
                                relationship_10== 'child' ~1,
                                relationship_11== 'child' ~1,
                                relationship_12== 'child' ~1,
                                relationship_13== 'child' ~1,
                                relationship_14== 'child' ~1,
                                relationship_15== 'child'~1,
                                 TRUE~0))




multigen24<-multigen24%>% 
  dplyr::mutate(adulthouseholder = case_when(AGE>=25~1,
                                 TRUE~0))



#create a variable for multigenerational household

multigen24 <- multigen24 %>% 
  dplyr::mutate(multigen = case_when(
    adulthouseholder == 1 & adultchild == 1 ~ 1,
    adulthouseholder == 1 & POPLOC != 0 ~ 1,
    adulthouseholder == 1 & MOMLOC != 0 ~ 1,
    adulthouseholder == 1 & POPLOC2 != 0 ~ 1,
    adulthouseholder == 1 & MOMLOC2 != 0 ~ 1,
    adulthouseholder == 1 & adultrelatives == 1 ~ 1,
    adulthouseholder == 1 & grandchildpresent == 1 ~ 1,
    adulthouseholder == 0 & multipleadultrelatives == 1 ~ 1,
    adulthouseholder == 0 & POPLOC != 0 & adultrelatives == 1 ~ 1,
    adulthouseholder == 0 & MOMLOC != 0 & adultrelatives == 1 ~ 1,
    adulthouseholder == 0 & POPLOC2 != 0 & adultrelatives == 1 ~ 1,
    adulthouseholder == 0 & MOMLOC2 != 0 & adultrelatives == 1 ~ 1,
    TRUE ~ 0
  ))



#count of all multigenerational households 


multigenfilter24<-multigen24%>%
  filter(multigen==1)


agg24<-aggregate(multigenfilter24$ASECWTH, by=list(multigenfilter24$YEAR), FUN=sum)
  
agg24                         
  Group.1        x
1    2024 16349808
#create dichotomous variable for whether a parent, their child and their grandchild live in the same household

multigen24<-multigen24%>% 
  dplyr::mutate(threegen = case_when(grandchildpresent==1 & childpresent==1 ~1,
                              TRUE~0))


16349808/129008033
[1] 0.1267348
#create dataframe with just household ID and multigen tag

linking24<-select(multigen24, HRHHID, HRHHID2.x, HRHHID2.y, CPSID, FAMREL, FAMUNIT, ASECWTH, HSEQ, multigen)


linking24<-rename(linking24, HRHHID2= HRHHID2.x)


#merge with original dataframe to get this at the individual level

all2024<-merge(linking24, cpsfull24, by= "HRHHID")
Warning in merge.data.frame(linking24, cpsfull24, by = "HRHHID"): column name
'HRHHID2.y' is duplicated in the result
#get agg counts

total24<-aggregate(all2024$ASECWT, by=list(all2024$multigen), FUN=sum)

total24
  Group.1         x
1       0 261631621
2       1  64152396
(64152396   /(261631621+64152396))
[1] 0.196917
#get counts of three gen

linking24<-select(multigen24, HRHHID, threegen, multigen)

#merge with original dataframe to get this at the individual level

linked24<-merge(cpsfull24, linking24, by= "HRHHID")


#get agg counts

threegen24<-aggregate(linked24$ASECWT, by=list(linked24$threegen), FUN=sum)

threegen24
  Group.1         x
1       0 314667393
2       1  11116623
(11116623/(11116623+314667393))
[1] 0.03412268