generate_proportion_draws

#
# generate_proportion_draws_example.R
#
# Reed Sorensen
# December 2017
#

reported_p <- 0.05
reported_se <- 0.03

# using random normal distribution makes part of the 
# probability distrubution go below zero (no-no)

draws1 <- rnorm(
  n = 100000, 
  mean = reported_p, 
  sd = reported_se
)

hist(draws1, breaks = 50, main = "Don't do this when taking draws from the distribution of a proportion")
abline(v = 0, lwd = 3, col = "red")

# first we have to transform 'p' and 'se' to logit space
# so the result it bounded by 0 and 1

logit_p <- log(reported_p / (1-reported_p))

# logit_se = squared root of [variance of logit(reported_p)]
# based on line 57 of: https://github.com/ihmeuw/ihme-modeling/blob/master/nonfatal_code/hiv/stgpr/03_prep_gpr.py 
# -- oddly Google found the reference I was looking for in the IHME GitHub repository #smallworld
logit_se <- sqrt(reported_se^2 / (reported_p*(1-reported_p))^2)

# -- it's normally distributed in logit space
draws2 <- rnorm(
  n = 100000, 
  mean = logit_p, 
  sd = logit_se
)

hist(draws2, breaks = 50, main = "Draws are normally distributed in logit space")

# -- and bounded by 0 and 1 in linear space

inverse_logit <- function(x) exp(x) / (1+exp(x))
hist(inverse_logit(draws2), breaks = 50, main = "Draws are bounded by 0 and 1 after inverse logit transformation")

# here's a function that implements the process

generate_p_draws <- function(p, se, n_draws) {
  logit_p <- log(p / (1-p) )
  logit_se <- sqrt(se^2 / (p*(1-p))^2 )
  draws <- rnorm(
    n = n_draws, 
    mean = logit_p, 
    sd = logit_se
  )
  out <- exp(draws) / (1+exp(draws))
  return(out)
}

# -- check that it gives the same result
draws3 <- generate_p_draws(p = 0.05, se = 0.03, n_draws = 100000)
hist(draws3, breaks = 50, main = "Check that the function gives the same result... yep")

generate_proportion_draws_example.R

rsoren

Sat Dec 02 18:28:25 2017