#Problem 1 Final Exam
n = 20000
norms = rnorm(n, mean=0, sd=1)
h.fun <- function(x) {
return(x^2)
}
f.fun <- function(x) {
val = -abs(x^3)/3
return(exp(val))
}
g.fun <- function(x) {
val = dnorm(x, mean=0, sd=1)
return(val)
}
f.y = f.fun(norms)
g.y = g.fun(norms)
h.y = h.fun(norms)
w.star = f.y/g.y
w = sum(w.star)
theta.IS = sum(h.y*w.star)/w
SE.theta.IS = sqrt(n)*sd(h.y*w.star/w)
list.results = list(theta.hat = theta.IS, SE.theta.hat = SE.theta.IS)
list.results
## $theta.hat
## [1] 0.7774946
##
## $SE.theta.hat
## [1] 0.005453647
fx = function(x) {
denom = 1 - (exp(-95) + exp(-5))/2
numer = exp(-100 * abs(x - 0.05))
val = 50*numer/denom
return(val)
}
gx = function(x) {
denom = pi * (1 + (100*(x-0.05))^2)
val = 100/denom
return(val)
}
t = seq(0,1,length.out=500)
plot(1, type='n', xlim=c(0,1), ylim=c(0,50), xlab='Theta Values', ylab='Density')
lines(t, gx(t), col='blue')
lines(t, fx(t), col='red')
legend('topright', legend=c("Cauchy", "Trunc Exp"), text.col=c('blue', 'red'))
As we can see, in order to construct an evelop we need to search for a scalar \(\alpha \in(0,1)\) such that \(\frac{g}{\alpha} > f\). Where \(f(x)\) is the Truncated Exponential and \(g(x)\) is the Cauchy. We will search for this \(\alpha\) computationally.
#want Cauchy to be an envelope for truncated exponential
search.alpha <- function(alpha, x, g.x, f.x) {
gx = g.x(x)
fx = f.x(x)
bool = any(gx/alpha < fx)
while(bool & alpha > 0) {
alpha = alpha - 0.01
bool = any(gx/alpha < fx)
}
return(alpha)
}
x.vals = seq(0, 1, length.out=1000)
feasible.alpha = search.alpha(alpha=1, x = x.vals, g.x=gx, f.x=fx)
feasible.alpha
## [1] 0.63
Our \(\alpha\) is equal to 0.63. Let’s check graphically if we now have an envelope.
t = seq(0,1,length.out=500)
plot(1, type='n', xlab='y', xlim=c(0,1), ylim=c(0,50), ylab='Density')
lines(t, gx(t), col='blue')
lines(t, fx(t), col='red')
lines(t, gx(t)/feasible.alpha, col='green')
legend('topright', legend=c("Cauchy", "Trunc Exp", 'Env'), text.col=c('blue', 'red', 'green'))
We can see that the green curve (the envelope) is now greater than or equal to all values of the red curve (the truncated exponential).
#Part (ii.)
#Generate Y from from envelope density Cauchy ~ g
#Generate U uniform (0.1)
#Reject Y if U > fx(Y)/e(Y) where fx is the density of the Trun Exponential
#e(Y) = g(Y)/alpha
#Otherwise accept Y and set X = Y
theta.values = c()
counter = 0
total.counter = 0
while(counter < 10000) {
u = runif(1)
y = rcauchy(1, location=0.05, scale=0.01)
fy = fx(y)
ey = gx(y)/feasible.alpha
if (u <= (fy/ey)) {
theta.values = c(theta.values, y)
counter = counter + 1
}
total.counter = total.counter + 1
}
hist(theta.values, freq=FALSE, xlab='values of theta', breaks=100, main='Histogram of 10,000 random values from f')
curve(fx, fro=0, to=1, add=TRUE, col='red')
#Problem 2 Part b
#computing weights
w = (theta.values^4)*(1-theta.values)^40
w = w/sum(w)
p_post = sample(theta.values, size=5000, replace=TRUE, prob=w)
hist(p_post, freq=TRUE, xlab='Posterior Theta Values', breaks=100, main='Posterior Frequency Distribution')
#Problem 2 Part C
val = sum((p_post^4)*((1-p_post)^46))
py4 = (1/5000)*(choose(50,4))*val
py4
## [1] 0.15507
Hence, the probability that he hit 4 home runs in 50 at bats is above.
\[ \begin{align} f(x_1, ..., x_{112}|\lambda_1, \lambda_2) &= \prod_{j=1}^{112} f(x_j|\lambda_1, \lambda_2)\\ & \propto (\prod_{j=1}^{40}e^{-\lambda_1}\lambda_1^{x_j})(\prod_{j=41}^{112}e^{-\lambda_2}\lambda_2^{x_j})\\ &= (e^{-40\lambda_1}\lambda_1^{\sum_{j=1}^{40}x_j})(e^{-72\lambda_2}\lambda_2^{\sum_{j=41}^{112}x_j}) \end{align} \]
\[ \begin{align} f(\lambda_1, \lambda_2, \beta|x_1,...,x_{112}) &\propto f(x_1, .., x_{112}|\lambda_1, \lambda_2, \beta) * f(\lambda_1, \lambda_2|\beta)*f(\beta)\\ &\propto (e^{-40\lambda_1}\lambda_1^{\sum_{j=1}^{40}x_j})(e^{-72\lambda_2}\lambda_2^{\sum_{j=41}^{112}x_j})(\beta^3\lambda_1^2e^{-\beta\lambda_1})(\beta^3\lambda_2^2e^{-\beta\lambda_2})(\beta^9e^{-10\beta}) \end{align} \]
By focusing on the parts of the joint that depent on \(\beta\) we can write the conditional.
\[ \begin{align} f(\beta|\lambda_1, \lambda_2, x_1,...,x_{112})&\propto (\beta^3e^{-\beta\lambda_1})(\beta^3e^{-\beta\lambda_2})(\beta^9e^{-10\beta})\\ &= \beta^{16-1}e^{-\beta(\lambda_1+\lambda_2+10)} \end{align} \]
From the above we can see that this follows a \(Gamma(\alpha=16, \beta=\lambda_1 + \lambda_2 + 10)\) distribution.
Now we will focus on the parts of the joint that depend on \(\lambda_1\)
\[ \begin{align} f(\lambda_1|\lambda_2, \beta, x_1,...,x_{112})&\propto (e^{-\lambda_1}\lambda_1^{\sum_{j=1}^{40}x_j})(\lambda_1^2e^{-\beta\lambda_1})\\ &=\lambda_1^{\sum_{j=1}^{40}x_j + 2}e^{-\lambda_1(\beta+1)}\\ &=\lambda_1^{\sum_{j=1}^{40}x_j + 3-1}e^{-\lambda_1(\beta+1)} \end{align} \]
From the above we can see that this follows a \(Gamma(\sum_{j=1}^{40}x_j+3, \beta+1)\) distribution.
Similarly, we can conclude that \(f(\lambda_2|\lambda_1, \beta, x_1, ..., x_{112})\) follows a \(Gamma(\sum_{j=41}^{112}x_j+3, \beta+1)\) distribution.
x = read.csv("coal.csv")
x = x$disasters
x.first40 = sum(x[1:40])
x.last72 = sum(x[41:112])
beta.vec = rep(NA, 10000)
lambda1.vec = rep(NA, 10000)
lambda2.vec = rep(NA, 10000)
beta.vec[1] = 1
for (i in 1:10000) {
lambda1.vec[i] = rgamma(1, x.first40+3, beta.vec[i]+1)
lambda2.vec[i] = rgamma(1, x.last72+3, beta.vec[i]+1)
if(i == 10000) {
break
} else {
beta.vec[i+1] = rgamma(1, lambda1.vec[i] + lambda2.vec[i] + 10)
}
}
library(coda)
gibbs.draws = cbind(lambda1.vec, lambda2.vec, beta.vec)
mcmc.draws = mcmc(gibbs.draws)
summary(mcmc.draws)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## lambda1.vec 6.512 1.866 0.01866 0.012883
## lambda2.vec 3.507 1.032 0.01032 0.007294
## beta.vec 19.907 5.213 0.05213 0.032443
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## lambda1.vec 3.798 5.268 6.241 7.458 10.647
## lambda2.vec 1.996 2.796 3.361 4.050 5.827
## beta.vec 11.327 16.304 19.409 23.028 31.342
X = gibbs.draws
temp = apply(X, 2, cumsum)
means = temp/matrix(1:10000, 10000, 3)
plot(1:10000, means[,1], type='l', main='Running Mean of Lambda_1')
plot(1:10000, means[,2], type='l', main='Running Mean of Lambda_2')
plot(1:10000, means[,3], type='l', main='Running Mean of Beta')
It appears that a safe bet for the amount we burn is 2,000.
hist(X[2001:10000,1], freq=FALSE, breaks=30, xlab='Values of Lambda_1', main='Density of Lambda_1 (burnt 2000 vals)')
library(mvtnorm)
set.seed(1010)
randomwalk <- function(nsim, delta1, delta2) {
X = matrix(NA, nrow=nsim, ncol=2)
X[1,] = c(0,0)
mu = c(1,2)
cov.sig = matrix(c(1,0.9, 0.9, 1), nrow=2, byrow=TRUE)
diag.delta = matrix(c(delta1, 0, 0, delta2), nrow=2, byrow=TRUE)
ft = dmvnorm(X[1,], mean=mu, sigma=cov.sig)
accept = 0
for(i in 2:nsim) {
xs = rmvnorm(1, mean=X[(i-1),], sigma=diag.delta)
fs = dmvnorm(xs, mean=mu, sigma=cov.sig)
r = fs/ft
if(r >= 1 || (r < 1 & runif(1) < r)) {
X[i,] = xs
ft = fs
accept = accept + 1
} else {
X[i,] = X[(i-1),]
}
}
ratio = accept/nsim
list(X = X, ratio=ratio)
}
chain = randomwalk(nsim=10000, delta1=0.001, delta2=0.001)
summary(mcmc(chain$X))
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## [1,] 0.1624 0.5642 0.005642 0.1980
## [2,] 1.1981 0.6693 0.006693 0.2736
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## var1 -1.0021 -0.2358 0.1874 0.5899 1.133
## var2 -0.2023 0.7943 1.2473 1.6527 2.377
chain$ratio
## [1] 0.9613
The acceptance rate is above. This acceptance rate seems way too high.
X.burnt = chain$X[1001:10000,]
mcmc.burned = mcmc(X.burnt)
plot(mcmc.burned)
The density plots do not look correct. The marginal of each variable (var1 and var2) should be a Normal Random variable with mean 1 and mean 2 respectively. Neither density looks normal
library(scales)
MU = c(1,2)
SIGMA = matrix(c(1,0.9, 0.9, 1), nrow=2, byrow=TRUE)
sampled.vals = rmvnorm(9000, mean=MU, sigma=SIGMA)
myblue = rgb(0,0,255, max=255, alpha=125, names='paleturquoise')
plot(sampled.vals[,1], sampled.vals[,2], col=alpha('blue',0.5), xlab="X1", ylab="X2")
points(X.burnt[,1], X.burnt[,2], col=alpha('red', 0.5))
legend("topleft",legend=c("R generated", "MH generated"), text.col=c('blue', 'red'))
The values do not agree because the diagonal matrix with \(\delta_1 = 0.001\) and \(\delta_2 = 001\) is not close in value to the covariance matrix. By a paper written by Muller (1991) we need to choose \(\Sigma\) to be as close as possible to the covariance matrix.
We will search for deltas with an acceptance ratio near 25%. Per Lecture recommendations this is an appropriate acceptance rate for higher dimensions. We will search delta values in the set 0.1 - 1.5.
delta1 = seq(0.1,1.5, length.out =10)
delta2 = seq(0.1, 1.5, length.out=10)
delta.mat = cbind(delta1, delta2)
acceptance.ratios = rep(0,10)
for(i in 1:10) {
chain = randomwalk(nsim=10000, delta1=delta.mat[i,1], delta2=delta.mat[i,2])
acceptance.ratios[i] = chain$ratio
}
I will not execute the code since it takes a few minutes, but after doing so I find that the delta values that produce an approximately 25% acceptance rate is 1.5.
Thus we will take \(\delta_1=1.5\) and \(\delta_2 =1.5\). Let’s see how our scatterplots align.
chain2 = randomwalk(nsim=10000, delta1=1.5, delta2=1.5)
chain2$ratio
## [1] 0.2629
X.burnt2 = chain2$X[1001:10000,]
myblue = rgb(0,0,255, max=255, alpha=125, names='paleturquoise')
plot(sampled.vals[,1], sampled.vals[,2], col=alpha('blue',0.5), xlab="X1", ylab="X2")
points(X.burnt2[,1], X.burnt2[,2], col=alpha('red', 0.5))
legend("topleft",legend=c("R generated", "MH generated"), text.col=c('blue', 'red'))
chain3 = randomwalk(nsim=10000, delta1=10, delta2=7)
chain3$ratio
## [1] 0.081
X.burnt3 = chain3$X[1001:10000,]
myblue = rgb(0,0,255, max=255, alpha=125, names='paleturquoise')
plot(X.burnt3[,1], X.burnt3[,2], col=alpha('red',0.5), xlab="X1", ylab="X2")
points(sampled.vals[,1], sampled.vals[,2], col=alpha('blue',0.5))
legend("topleft",legend=c("R generated", "MH generated"), text.col=c('blue', 'red'))
Empirically the acceptance rate drops as delta values are made to be larger. This makes sense because the \(x1\) and \(x2\) coordinates have a higher chance of being chosen far from the mean. Thus, it is unlikely that these points would be accepted when analyzing the M-H ratio R.