Use IPUMS Data from Github
Filter data for only Household Heads
HHO<-subset(ipums,relate==1)
HHO$famsizenew<-as.numeric(HHO$famsize)
summary(HHO$famsizenew)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 2.378 3.000 20.000
Use Variable for Families of Foreign born and U.S. born Household Heads
HHO$USborn <- ifelse(HHO$bpl<=120, "USHead", "ForeignHead")
aggregate(HHO$famsizenew, list(HHO=HHO$USborn),mean)
## HHO x
## 1 ForeignHead 2.934445
## 2 USHead 2.290301
aggregate(HHO$famsizenew, list(HHO=HHO$USborn),sd)
## HHO x
## 1 ForeignHead 1.683525
## 2 USHead 1.332221
mean(HHO$famsizenew[HHO$USborn=="USHead"])
## [1] 2.290301
sd(HHO$famsizenew[HHO$USborn=="USHead"])/sqrt(length(HHO$famsizenew[HHO$USborn=="USHead"]))
## [1] 0.004183421
mean(HHO$famsizenew[HHO$USborn=="ForeignHead"])
## [1] 2.934445
sd(HHO$famsizenew[HHO$USborn=="ForeignHead"])/sqrt(length(HHO$famsizenew[HHO$USborn=="ForeignHead"]))
## [1] 0.01335332
Calculate the confidence interval for the mean family size for US born using normal approximation
#
norm.interval = function(data, conf.level = 0.95)
{z = qnorm((1 - conf.level)/2, lower.tail = FALSE)
variance = var(data, na.rm=T)
xbar = mean(data, na.rm=T)
sdx = sqrt(variance/length(data))
c(xbar - z * sdx, xbar + z * sdx) }
norm.interval((HHO$famsizenew[HHO$USborn=="USHead"]))
## [1] 2.282102 2.298500
Calculate the confidence interval for the mean family size for Foreign born using normal approximation
#
norm.interval = function(data, conf.level = 0.95)
{z = qnorm((1 - conf.level)/2, lower.tail = FALSE)
variance = var(data, na.rm=T)
xbar = mean(data, na.rm=T)
sdx = sqrt(variance/length(data))
c(xbar - z * sdx, xbar + z * sdx) }
norm.interval((HHO$famsizenew[HHO$USborn=="ForeignHead"]))
## [1] 2.908273 2.960617
Do the confidence intervals for the mean family size for USHead and ForeignHead overlap? No, they do not.
Calculate the confidence interval for the mean family size for U.S. Head of Household using the bootstrap.
n.sim<-1000
mus<-numeric(n.sim)
vars<-numeric(n.sim)
for (i in 1:n.sim){
dat<-sample(HHO$famsizenew[HHO$USborn=="USHead"],size=length(HHO$famsizenew[HHO$USborn=="USHead"]), replace=T)
mus[i]<-mean(dat, na.rm=T)
vars[i]<-var(dat, na.rm=T)
}
par(mfrow=c(1,2))
hist(mus,freq=F, main="Bootstrap distribution of means")
abline(v=mean(HHO$famsizenew[HHO$USborn=="USHead"], na.rm=T), col=2, lwd=3)
hist(vars, freq=F,main="Bootstrap distribution of variance")
abline(v=var(HHO$famsizenew[HHO$USborn=="USHead"], na.rm=T), col=2, lwd=3)

quantile(mus, p=c(.025, .975))
## 2.5% 97.5%
## 2.281783 2.298715
Calculate the confidence interval for the mean family size for Foreign head of household using the bootstrap.
n.sim<-1000
mus<-numeric(n.sim)
vars<-numeric(n.sim)
for (i in 1:n.sim){
dat<-sample(HHO$famsizenew[HHO$USborn=="ForeignHead"],size=length(HHO$famsizenew[HHO$USborn=="ForeignHead"]), replace=T)
mus[i]<-mean(dat, na.rm=T)
vars[i]<-var(dat, na.rm=T)
}
par(mfrow=c(1,2))
hist(mus,freq=F, main="Bootstrap distribution of means")
abline(v=mean(HHO$famsizenew[HHO$USborn=="ForeignHead"], na.rm=T), col=2, lwd=3)
hist(vars, freq=F,main="Bootstrap distribution of variance")
abline(v=var(HHO$famsizenew[HHO$USborn=="ForeignHead"], na.rm=T), col=2, lwd=3)

quantile(mus, p=c(.025, .975))
## 2.5% 97.5%
## 2.907265 2.960874
How do results from with normal approximation method compare with bootstrap method? For U.S. Head of Household using the normal approximation method, the interval is slightly wider, 2.2821 to 2.2985, compared to bootstrap, 2.2824 to 2.2977. For Foreign Head of Household, the normal approximation method is very similar to bootstrap method.