First things first, load the ipums data:

library(haven)
library(readr)
ipums<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")

1)Filter to only have household heads by creating a new variable: onlyheads

onlyheads<-subset(ipums, relate==1)

or as in 2)

2) Historgram of Family Size

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
ipums%>%
filter(relate==1) %>%
ggplot()+
geom_histogram(aes(famsize))
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3) Average Family Size, Head inside U.S. / outside U.S. born

Average etc.

ipums%>%
  mutate(bornus= case_when(ipums$bpl %in% c(1:120)~"US Born",ipums$bpl %in% c(120:998)~"Foreign Born"))%>%
  filter(relate==1) %>%
  group_by(bornus) %>%
  summarise(mean(famsize), sd(famsize))
## # A tibble: 2 x 3
##         bornus `mean(famsize)` `sd(famsize)`
##          <chr>           <dbl>         <dbl>
## 1 Foreign Born        2.934445      1.683525
## 2      US Born        2.290301      1.332221

4) Histogram, Box and Whisker Plot

Histogram

ipums%>%
  mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
  filter(relate==1) %>%
  ggplot(.)+
  geom_histogram(mapping = aes(famsize), binwidth = .5)+
  scale_x_discrete("number of family members", limits = seq(0,20,by=2))+
  facet_wrap(~bornus)+
  ggtitle("Family Size by Birth Place")+
  xlab("Size")+
  ylab("number of households")

Box Plot

ipums%>%
  mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
  filter(relate==1) %>%
  ggplot(.)+
  geom_boxplot(aes(bornus,famsize),ymin=1,ymax=20)+
  ggtitle("Family Size by Birth Place")+
  xlab("head by birthplace")+
  ylab("number of houshold members")
## Don't know how to automatically pick scale for object of type labelled. Defaulting to continuous.

5) Average Size by Age

ipums%>%
  mutate(bornus= case_when(.$bpl %in% c(1:120)~"US Born",.$bpl %in% c(120:998)~"Foreign Born"))%>%
  filter(relate==1) %>%
  group_by(age)%>%
  summarise(avsize=mean(famsize))%>%
  ggplot(.)+
  geom_point(aes(age,avsize),size=.9)+
  ggtitle("Average by Family Size by Houshold Head")+
  xlab("age")+
  ylab("average number of houshold members")+
  scale_x_discrete("Age",limits=seq(15,105,by=5))+
  geom_smooth(aes(age,avsize))
## `geom_smooth()` using method = 'loess'