library(ggplot2)
df <- data.frame(theta = seq(0, 1, by = 0.01))
df <- transform(df, posterior = dbeta(theta, 11, 4))
ggplot(df, aes(x = theta, y = posterior)) +
geom_line()

#ggsave('beta_distribution.png')
theta <- seq(0.01, 0.99, by = 0.01)
df <- data.frame()
for (n in c(10, 100, 1000))
{
l <- dbinom(0.9 * n, n, theta)
df <- rbind(df, data.frame(theta = theta, L = l, N = n))
}
ggplot(df, aes(x = theta, y = log(L))) +
geom_line() +
facet_grid(. ~ N) +
ylab('Log Likelihood') +
ggtitle("Log Likelihood Function as N Increases")

#ggsave('fisher_information.png')
d <- c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
likelihood <- function(d, theta)
{
dbinom(sum(d), length(d), prob = theta)
}
df <- data.frame(theta = seq(0, 1, by = 0.01))
df <- transform(df, likelihood = likelihood(d, theta))
ggplot(df, aes(x = theta, y = likelihood)) +
geom_line()

# Likelihoods do not integrate to 1 when integrating with respect to theta.
with(df[1:100, ], sum(likelihood) * 0.01)
## [1] 0.09082579
# beta(2, 3) prior
df <- transform(df, prior = dbeta(theta, 2, 3))
ggplot(df, aes(x = theta, y = prior)) +
geom_line()

# Unnormalized posterior.
df <- transform(df, unnormalizedPosterior = prior * likelihood)
ggplot(df, aes(x = theta, y = unnormalizedPosterior)) +
geom_line()

# Normalize so all integrate to 101.
#df <- transform(df, likelihood = likelihood * (101 / sum(likelihood)))
#df <- transform(df, prior = prior * (101 / sum(prior)))
df <- transform(df, normalizedPosterior = unnormalizedPosterior * (101 / sum(unnormalizedPosterior)))
# Visualize normalized versions.
library('reshape')
compare.df <- melt(df, id.vars = 'theta')
ggplot(compare.df, aes(x = theta, y = value, color = variable, group = variable)) +
geom_line()

#ggsave('grid_approximation.png')
# Notice that posterior is more peaked than prior. This is because confidence has gone up, even though we've changed our minds.
# Compare with correct posterior, which is a beta(11, 4) distribution.
df <- transform(df, truePosterior = dbeta(theta, 11, 4))
# Visualize normalized versions.
compare.df <- melt(df, id.vars = 'theta')
compare.df <- subset(compare.df, variable %in% c('normalizedPosterior', 'truePosterior'))
ggplot(compare.df, aes(x = theta, y = value, color = variable, group = variable)) +
geom_line()

#ggsave('grid_quality.png')
theta <- seq(0, 1, by = 0.01)
l <- dbinom(9, 10, theta)
df <- data.frame(theta = theta, L = l)
ggplot(df, aes(x = theta, y = L)) +
geom_line() +
ggtitle("Likelihood Function after 9 Mistaken Orders and 1 Correct Order")

#ggsave('likelihood_function.png')
ggplot(df, aes(x = theta, y = L)) +
geom_line() +
geom_vline(xintercept = 0.9, color = 'blue') +
ggtitle("Likelihood Function after 9 Mistaken Orders and 1 Correct Order")

#ggsave('likelihood_function_mle.png')
theta <- seq(0, 1, by = 0.01)
df <- data.frame(theta = theta,
d = dbeta(theta, 2, 1))
ggplot(df, aes(x = theta, y = d)) +
geom_line() +
ylab('P(theta)') +
ggtitle("Bayesian Estimate")

#ggsave('sample_beta.png')
sample.sizes <- 4 ^ seq(0, 3, by = 1)
theta <- 0.5
n.rep <- 5000
sampling.distribution <- data.frame()
for (sample.size in sample.sizes)
{
for (sim in 1:n.rep)
{
estimator <- mean(rbinom(sample.size, 1, prob = theta))
sampling.distribution <- rbind(sampling.distribution, data.frame(N = sample.size, I = sim, Estimator = estimator))
}
}
ggplot(sampling.distribution, aes(x = Estimator)) +
geom_histogram() +
facet_grid(N ~ .) +
xlab('Estimated Theta') +
ggtitle("Sampling Distributions for Theta as N Grows")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

#ggsave('sampling_distribution.png')