1) First things first, load the libraries, add a function for Standard Error (mySE) and load ipums data:

library(haven)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
mySE<-function(x){sd(x,na.rm=T)/length(x)}
ipums<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")

Filter to only have household heads by creating a new variable: onlyheads

onlyheads<-subset(ipums, relate==1)

2) Average, SD, Standard Error of Family Size, Head inside U.S. / outside U.S. born

onlyheads<-ipums%>%
  mutate(bornus= case_when(ipums$bpl %in% c(1:120)~"US Born",ipums$bpl %in% c(120:998)~"Foreign Born"))%>%
  filter(relate==1)

onlyheads%>%
  group_by(bornus) %>%
  summarise(mean(famsize), sd(famsize), mySE(famsize))
## # A tibble: 2 x 4
##         bornus `mean(famsize)` `sd(famsize)` `mySE(famsize)`
##          <chr>           <dbl>         <dbl>           <dbl>
## 1 Foreign Born        2.934445      1.683525    1.059154e-04
## 2      US Born        2.290301      1.332221    1.313672e-05

3)t-Test

US Born:

    t.test(onlyheads$famsize[onlyheads$bornus=="US Born"])
## 
##  One Sample t-test
## 
## data:  onlyheads$famsize[onlyheads$bornus == "US Born"]
## t = 547.47, df = 101410, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  2.282101 2.298500
## sample estimates:
## mean of x 
##  2.290301

Foreign Born:

    t.test(onlyheads$famsize[onlyheads$bornus=="Foreign Born"])
## 
##  One Sample t-test
## 
## data:  onlyheads$famsize[onlyheads$bornus == "Foreign Born"]
## t = 219.75, df = 15894, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  2.908271 2.960619
## sample estimates:
## mean of x 
##  2.934445

4) Confidence Interval using Bootsrap

US Born:

n.sim<-1000
mus<-numeric(n.sim)
OUTvars<-numeric(n.sim)
for (i in 1:n.sim){  
     dat<-sample(onlyheads$famsize[onlyheads$bornus=="US Born"],size=length(onlyheads$famsize[onlyheads$bornus=="US Born"]), replace=T)
     mus[i]<-mean(dat, na.rm=T)
}

quantile(mus, p=c(.025,.975))
##     2.5%    97.5% 
## 2.282304 2.298013
# or t.test(dat)

Foreign Born:

n.sim<-1000
mus<-numeric(n.sim)
OUTvars<-numeric(n.sim)
for (i in 1:n.sim){  
     dat<-sample(onlyheads$famsize[onlyheads$bornus=="Foreign Born"],size=length(onlyheads$famsize[onlyheads$bornus=="Foreign Born"]), replace=T)
     mus[i]<-mean(dat, na.rm=T)
}

quantile(mus, p=c(.025,.975))
##     2.5%    97.5% 
## 2.909446 2.962631
# or t.test(dat)

The result should be very similar (I’m to 95 percent confident).