Source file ⇒ Econ142PS1-1.Rmd
Student ID: 25413550

Question #1

Set up an R program to conduct a simulation of drawing a sample size of n from a Bernoulli distribution with mean p.In each “replication” r you will draw a sample, construct the estimated mean from that replication, and calculate the 95% confidence interval where s is the estimated standard deviation on that replication. Find: a) the mean estimate of p b) the mean estimate of the true standard deviation c) the fraction of time that the confidence interval contains the true p. This is called the “coverage rate”

#Question 1 part a) and b)
set.seed(1)

# n=30, p=0.05
w = rbinom(30,1,0.05)
w1000 = replicate(1000, w)

w1000samplemean <- mean(apply(w1000,2,mean))
w1000samplesd <- mean(apply(w1000,2,sd))

# n=30, p = 0.25
x = rbinom(30,1,0.25)
x1000 = replicate(1000, x)

x1000samplemean <- mean(apply(x1000,2,mean))
x1000samplesd <- mean(apply(x1000,2,sd))

# n=60, p=0.05
y = rbinom(60, 1,0.05)
y1000 = replicate(1000, y)

y1000samplemean <- mean(apply(y1000,2,mean))
y1000samplesd <- mean(apply(y1000,2,sd))

# n=60, 0=0.25
z = rbinom(60,1,0.25)
z1000 = replicate(1000, z)

z1000samplemean <- mean(apply(z1000,2,mean))
z1000samplesd <- mean(apply(z1000,2,sd))

# create table of est. mean & est. SD
wtab <- matrix(c(w1000samplemean,w1000samplesd),1,2)
colnames(wtab) <- c("Estimated Mean", "Estimated SD")
rownames(wtab) <- c("n=30, p=0.05")

xtab <- matrix(c(x1000samplemean,x1000samplesd),1,2)
colnames(xtab) <- c("Estimated Mean", "Estimated SD")
rownames(xtab) <- c("n=30, p=0.25")

ytab <- matrix(c(y1000samplemean,y1000samplesd),1,2)
colnames(ytab) <- c("Estimated Mean", "Estimated SD")
rownames(ytab) <- c("n=60, p=0.05")

ztab <- matrix(c(z1000samplemean,z1000samplesd),1,2)
colnames(ztab) <- c("Estimated Mean", "Estimated SD")
rownames(ztab) <- c("n=60, p=0.25")

table1 <- rbind(wtab,xtab,ytab,ztab)

Question 1 parts a) & b):

##              Estimated Mean Estimated SD
## n=30, p=0.05     0.03333333    0.1825742
## n=30, p=0.25     0.20000000    0.4068381
## n=60, p=0.05     0.05000000    0.2197842
## n=60, p=0.25     0.20000000    0.4033756
#Question 1 part c)

## w; n=30, p=0.05
## x; n=30, p=0.25
## y; n=60, p=0.05
## z; n=60, p=0.25

percentInCI <- 
  function(num_reps, n, p){
    # populate array
    s_mean <-rep(0,num_reps)
    CIlower <- rep(0,num_reps)
    CIupper <- rep(0,num_reps)
    inCI <- rep(0,num_reps)
    
    # loop for trials
    for(i in 1:num_reps){
      potato <- rbinom(n,1,p)
      s_mean[i] <- mean(potato)
      CIlower[i] <-s_mean[i]-1.96*(sqrt(s_mean[i]*(1-s_mean[i]))/sqrt(n))
      CIupper[i] <-s_mean[i]+1.96*(sqrt(s_mean[i]*(1-s_mean[i]))/sqrt(n)) 
      
      if( p>CIlower[i] & p < CIupper[i]){
        inCI[i] <-1
        }
      else {
        inCI[i] <-0
      }
      }
    percentIn <- sum(inCI)/num_reps
  }

wpercent <- percentInCI(1000,30,0.05)
xpercent <- percentInCI(1000,30,0.25)
ypercent <- percentInCI(1000,60,0.05)
zpercent <- percentInCI(1000,60,0.25)

CItable <- matrix(c(wpercent,xpercent,ypercent,zpercent))
colnames(CItable) <- c("% in 95% CI")
rownames(CItable) <- c("w; n=30, p=0.05","x; n=30, p=0.25","y; n=60, p=0.05","z; n=60, p=0.25")

Question 1 Part c):

##                 % in 95% CI
## w; n=30, p=0.05       0.776
## x; n=30, p=0.25       0.941
## y; n=60, p=0.05       0.779
## z; n=60, p=0.25       0.943

Yes, I agree that n of 30 or larger is enough to ensure that asymptotic CIs work well. It is shown by the table above that n=30 yields similar to n=60.