1. Using IPUMS Data

library(dplyr) #to manipulate data

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2) #to visualize data

## Loading Data
library(haven)
ipums<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")

2. Standard Error for the Mean Household Size, by BirthPlace of Household Heads

ipums %>%
  filter(relate==1) %>% #to only have household heads
  mutate(BirthPlace=ifelse(bpl<=120,"US Born","Foreign Born")) %>% #applying the conditions
  group_by(BirthPlace) %>%
  summarise(mean_familysize=mean(famsize), sd=sd(famsize), standard_error=sd(famsize)/sqrt(length(famsize)))

## # A tibble: 2 x 4
##     BirthPlace mean_familysize       sd standard_error
##          <chr>           <dbl>    <dbl>          <dbl>
## 1 Foreign Born        2.934445 1.683525    0.013353324
## 2      US Born        2.290301 1.332221    0.004183421

3. Confidence Intervals for the Mean Family Size, by BirthPlace of Household Heads

Loading the function to compute a confidence interval

norm.interval = function(data, conf.level = 0.95) #loading the function to compute a confidence interval from a Normal Distribution
{z = qnorm((1 - conf.level)/2, lower.tail = FALSE)

 variance = var(data, na.rm=T)
 xbar = mean(data, na.rm=T)
 sdx = sqrt(variance/length(data))
 c(xbar - z * sdx, xbar + z * sdx) }

norm.interval(ipums$famsize)

## [1] 3.043024 3.055395

Calculating Confidence Intervals for the Mean Family Size for Families of Foreign Born and US Born Household Heads

foreign_born<-ipums %>%
  filter(relate==1) %>%
  mutate(birthplace=ifelse(bpl<=120,"US_BORN","FOREIGN_BORN")) %>%
  filter(birthplace=="FOREIGN_BORN")

us_born<-ipums %>%
  filter(relate==1) %>%
  mutate(birthplace=ifelse(bpl<=120,"US_BORN","FOREIGN_BORN")) %>%
  filter(birthplace=="US_BORN")

norm.interval(foreign_born$famsize) #confidence intervals for foreign born

## [1] 2.908273 2.960617

norm.interval(us_born$famsize) #confidence intervals for us born

## [1] 2.282102 2.298500

The confidence intervals for Foreign Born is (2.908273-2.960617) and US Born is (2.282102-2.298500). Hence, they do not overlap.

4. Calculating Confidence Intervals for the Mean Family Size for Households by BirthPlace using the Bootstrap Method

For Foreign Born Household Heads

n.sim<-1000

foreign_mus<-numeric(n.sim)
foreign_vars<-numeric(n.sim)
for (i in 1:n.sim){  
  dat<-sample(foreign_born$famsize,size=length(foreign_born$famsize), replace=T)
  foreign_mus[i]<-mean(dat, na.rm=T)
  foreign_vars[i]<-var(dat, na.rm=T)
}

par(mfrow=c(1,2))
hist(foreign_mus,freq=F, main="Bootstrap distrbution of 
    foereign born famsize means")

abline(v=mean(foreign_born$famsize, na.rm=T), col=2, lwd=3)

hist(foreign_vars, freq=F,main="Bootstrap distrbution of 
    foereign born famsize variance")
abline(v=var(foreign_born$famsize, na.rm=T), col=2, lwd=3)

Bootstrap confidence intervals using percentile method for Foreign Born Means

quantile(foreign_mus,p=c(.025, .975))

##     2.5%    97.5% 
## 2.910346 2.960503

For US Born Household Heads

n.sim<-1000

us_mus<-numeric(n.sim)
us_vars<-numeric(n.sim)
for (i in 1:n.sim){  
  dat<-sample(us_born$famsize,size=length(us_born$famsize), replace=T)
  us_mus[i]<-mean(dat, na.rm=T)
  us_vars[i]<-var(dat, na.rm=T)
}

par(mfrow=c(1,2))
hist(us_mus,freq=F, main="Bootstrap distrbution of 
  US born famsize means")
abline(v=mean(us_born$famsize, na.rm=T), col=2, lwd=3)

hist(us_vars, freq=F,main="Bootstrap distrbution of 
  US born famsize means")
abline(v=var(us_born$famsize, na.rm=T), col=2, lwd=3)

Bootstrap confidence intervals using percentile method for US Born Means

quantile(us_mus,p=c(.025, .975))

##     2.5%    97.5% 
## 2.282194 2.298930

Comparing the confidence intervals

for foreign born

norm.interval(foreign_born$famsize)

## [1] 2.908273 2.960617

quantile(foreign_mus,p=c(.025, .975))

##     2.5%    97.5% 
## 2.910346 2.960503

for US born

norm.interval(us_born$famsize)

## [1] 2.282102 2.298500

quantile(us_mus,p=c(.025, .975))

##     2.5%    97.5% 
## 2.282194 2.298930

The comparison shows that the two bootstrapped confidence intervals are pretty close to the original confidence intervals, and they do not overlap either.

DEM 7273 - Homework 4

Muntasir

September 18, 2017

1. Using IPUMS Data

2. Standard Error for the Mean Household Size, by BirthPlace of Household Heads

3. Confidence Intervals for the Mean Family Size, by BirthPlace of Household Heads

Loading the function to compute a confidence interval

Calculating Confidence Intervals for the Mean Family Size for Families of Foreign Born and US Born Household Heads

The confidence intervals for Foreign Born is (2.908273-2.960617) and US Born is (2.282102-2.298500). Hence, they do not overlap.

4. Calculating Confidence Intervals for the Mean Family Size for Households by BirthPlace using the Bootstrap Method

For Foreign Born Household Heads

Bootstrap confidence intervals using percentile method for Foreign Born Means

For US Born Household Heads

Bootstrap confidence intervals using percentile method for US Born Means

Comparing the confidence intervals

for foreign born

for US born

The comparison shows that the two bootstrapped confidence intervals are pretty close to the original confidence intervals, and they do not overlap either.