slides

library(ggplot2)

df <- data.frame(theta = seq(0, 1, by = 0.01))
df <- transform(df, posterior = dbeta(theta, 11, 4))

ggplot(df, aes(x = theta, y = posterior)) +
  geom_line()

#ggsave('beta_distribution.png')

theta <- seq(0.01, 0.99, by = 0.01)

df <- data.frame()

for (n in c(10, 100, 1000))
{
  l <- dbinom(0.9 * n, n, theta)
  df <- rbind(df, data.frame(theta = theta, L = l, N = n))
}

ggplot(df, aes(x = theta, y = log(L))) +
  geom_line() +
  facet_grid(. ~ N) +
  ylab('Log Likelihood') +
  ggtitle("Log Likelihood Function as N Increases")

#ggsave('fisher_information.png')

d <- c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1)

likelihood <- function(d, theta)
{
  dbinom(sum(d), length(d), prob = theta)
}

df <- data.frame(theta = seq(0, 1, by = 0.01))
df <- transform(df, likelihood = likelihood(d, theta))

ggplot(df, aes(x = theta, y = likelihood)) +
  geom_line()

# Likelihoods do not integrate to 1 when integrating with respect to theta.
with(df[1:100, ], sum(likelihood) * 0.01)

## [1] 0.09082579

# beta(2, 3) prior
df <- transform(df, prior = dbeta(theta, 2, 3))

ggplot(df, aes(x = theta, y = prior)) +
  geom_line()

# Unnormalized posterior.
df <- transform(df, unnormalizedPosterior = prior * likelihood)

ggplot(df, aes(x = theta, y = unnormalizedPosterior)) +
  geom_line()

# Normalize so all integrate to 101.
#df <- transform(df, likelihood = likelihood * (101 / sum(likelihood)))
#df <- transform(df, prior = prior * (101 / sum(prior)))
df <- transform(df, normalizedPosterior = unnormalizedPosterior * (101 / sum(unnormalizedPosterior)))

# Visualize normalized versions.
library('reshape')
compare.df <- melt(df, id.vars = 'theta')

ggplot(compare.df, aes(x = theta, y = value, color = variable, group = variable)) +
  geom_line()

#ggsave('grid_approximation.png')

# Notice that posterior is more peaked than prior. This is because confidence has gone up, even though we've changed our minds.

# Compare with correct posterior, which is a beta(11, 4) distribution.
df <- transform(df, truePosterior = dbeta(theta, 11, 4))

# Visualize normalized versions.
compare.df <- melt(df, id.vars = 'theta')

compare.df <- subset(compare.df, variable %in% c('normalizedPosterior', 'truePosterior'))

ggplot(compare.df, aes(x = theta, y = value, color = variable, group = variable)) +
  geom_line()

#ggsave('grid_quality.png')

theta <- seq(0, 1, by = 0.01)

l <- dbinom(9, 10, theta)

df <- data.frame(theta = theta, L = l)

ggplot(df, aes(x = theta, y = L)) +
  geom_line() +
   ggtitle("Likelihood Function after 9 Mistaken Orders and 1 Correct Order")

#ggsave('likelihood_function.png')

ggplot(df, aes(x = theta, y = L)) +
  geom_line() +
  geom_vline(xintercept = 0.9, color = 'blue') +
  ggtitle("Likelihood Function after 9 Mistaken Orders and 1 Correct Order")

#ggsave('likelihood_function_mle.png')

theta <- seq(0, 1, by = 0.01)

df <- data.frame(theta = theta,
                 d = dbeta(theta, 2, 1))

ggplot(df, aes(x = theta, y = d)) +
  geom_line() +
  ylab('P(theta)') +
  ggtitle("Bayesian Estimate")

#ggsave('sample_beta.png')

sample.sizes <- 4 ^ seq(0, 3, by = 1)

theta <- 0.5

n.rep <- 5000

sampling.distribution <- data.frame()

for (sample.size in sample.sizes)
{
  for (sim in 1:n.rep)
  {
    estimator <- mean(rbinom(sample.size, 1, prob = theta))
    sampling.distribution <- rbind(sampling.distribution, data.frame(N = sample.size, I = sim, Estimator = estimator))
  }
}

ggplot(sampling.distribution, aes(x = Estimator)) +
  geom_histogram() +
  facet_grid(N ~ .) +
  xlab('Estimated Theta') +
  ggtitle("Sampling Distributions for Theta as N Grows")

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

#ggsave('sampling_distribution.png')