Use IPUMS Data from Github

Filter data for only Household Heads

HHO<-subset(ipums,relate==1)
HHO$famsizenew<-as.numeric(HHO$famsize)
summary(HHO$famsizenew)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   2.378   3.000  20.000

Use Variable for Families of Foreign born and U.S. born Household Heads

HHO$USborn <- ifelse(HHO$bpl<=120, "USHead", "ForeignHead")
aggregate(HHO$famsizenew, list(HHO=HHO$USborn),mean)
##           HHO        x
## 1 ForeignHead 2.934445
## 2      USHead 2.290301
aggregate(HHO$famsizenew, list(HHO=HHO$USborn),sd)
##           HHO        x
## 1 ForeignHead 1.683525
## 2      USHead 1.332221
mean(HHO$famsizenew[HHO$USborn=="USHead"])
## [1] 2.290301
sd(HHO$famsizenew[HHO$USborn=="USHead"])/sqrt(length(HHO$famsizenew[HHO$USborn=="USHead"]))
## [1] 0.004183421
mean(HHO$famsizenew[HHO$USborn=="ForeignHead"])
## [1] 2.934445
sd(HHO$famsizenew[HHO$USborn=="ForeignHead"])/sqrt(length(HHO$famsizenew[HHO$USborn=="ForeignHead"]))
## [1] 0.01335332

Calculate the confidence interval for the mean family size for US born using normal approximation

#
norm.interval = function(data, conf.level = 0.95) 
{z = qnorm((1 - conf.level)/2, lower.tail = FALSE)

 variance = var(data, na.rm=T)
 xbar = mean(data, na.rm=T)
 sdx = sqrt(variance/length(data))
 c(xbar - z * sdx, xbar + z * sdx) }

norm.interval((HHO$famsizenew[HHO$USborn=="USHead"]))
## [1] 2.282102 2.298500

Calculate the confidence interval for the mean family size for Foreign born using normal approximation

#
norm.interval = function(data, conf.level = 0.95) 
{z = qnorm((1 - conf.level)/2, lower.tail = FALSE)

 variance = var(data, na.rm=T)
 xbar = mean(data, na.rm=T)
 sdx = sqrt(variance/length(data))
 c(xbar - z * sdx, xbar + z * sdx) }

norm.interval((HHO$famsizenew[HHO$USborn=="ForeignHead"]))
## [1] 2.908273 2.960617

Do the confidence intervals for the mean family size for USHead and ForeignHead overlap? No, they do not.

Calculate the confidence interval for the mean family size for U.S. Head of Household using the bootstrap.

n.sim<-1000

mus<-numeric(n.sim)
vars<-numeric(n.sim)
for (i in 1:n.sim){  
  dat<-sample(HHO$famsizenew[HHO$USborn=="USHead"],size=length(HHO$famsizenew[HHO$USborn=="USHead"]), replace=T)
  mus[i]<-mean(dat, na.rm=T)
  vars[i]<-var(dat, na.rm=T)
}


par(mfrow=c(1,2))
hist(mus,freq=F, main="Bootstrap distribution of means")
abline(v=mean(HHO$famsizenew[HHO$USborn=="USHead"], na.rm=T), col=2, lwd=3)

hist(vars, freq=F,main="Bootstrap distribution of variance")
abline(v=var(HHO$famsizenew[HHO$USborn=="USHead"], na.rm=T), col=2, lwd=3)

quantile(mus, p=c(.025, .975))
##     2.5%    97.5% 
## 2.281783 2.298715

Calculate the confidence interval for the mean family size for Foreign head of household using the bootstrap.

n.sim<-1000

mus<-numeric(n.sim)
vars<-numeric(n.sim)
for (i in 1:n.sim){  
  dat<-sample(HHO$famsizenew[HHO$USborn=="ForeignHead"],size=length(HHO$famsizenew[HHO$USborn=="ForeignHead"]), replace=T)
  mus[i]<-mean(dat, na.rm=T)
  vars[i]<-var(dat, na.rm=T)
}


par(mfrow=c(1,2))
hist(mus,freq=F, main="Bootstrap distribution of means")
abline(v=mean(HHO$famsizenew[HHO$USborn=="ForeignHead"], na.rm=T), col=2, lwd=3)

hist(vars, freq=F,main="Bootstrap distribution of variance")
abline(v=var(HHO$famsizenew[HHO$USborn=="ForeignHead"], na.rm=T), col=2, lwd=3)

quantile(mus, p=c(.025, .975))
##     2.5%    97.5% 
## 2.907265 2.960874

How do results from with normal approximation method compare with bootstrap method? For U.S. Head of Household using the normal approximation method, the interval is slightly wider, 2.2821 to 2.2985, compared to bootstrap, 2.2824 to 2.2977. For Foreign Head of Household, the normal approximation method is very similar to bootstrap method.