Problem 1

(a)

n = 20000
x = runif(n)
g = (1/(1+x^2))*exp(-x)
h = exp(-0.5)/(1+x^2)
mu = pi/(4*exp(1/2))
c.star = -cov(g,h)/var(h)
theta.CV = (1/n)*sum(g+c.star*(h-mu))
theta.CV
## [1] 0.524842

(b)

# Standard Errors of theta.MC
a = 0
b = 1
SE.MC = (b-a)*sd(g)/sqrt(n)

# Correlation
cor(g,h)
## [1] 0.9738284
# Standard Errors of theta.CV
VAR.CV = (SE.MC)^2*(1-cor(g,h)^2)
SE.CV = sqrt(VAR.CV)
SE.CV
## [1] 0.0003939893

\[When\text{ }\text{ } \theta=\int_{0}^{1}g(x)dx, \text{ }\text{ }\int_{0}^{1}h(x)dx = \mu,\] \[The\text{ }control\text{ }variate\text{ }estimator\text{ }is\] \[\hat{\theta}_{CV}=\frac{1}{n}\sum_{i=1}^{n}[g(x_i)+C(h(x_i)-\mu)]\] \[where\text{ }x_{1},...,x_{n}\text{ }(iid)\text{ }\sim unif(0,1)\text{ }and \text{ }C\text{ }is\text{ }constant.\]

\[Var(\hat{\theta}_{CV})=\frac{1}{n}\{Var(g(x))+C^2 Var(h(x))+2 C Cov(g(x),h(x))\}\] \[The\text{ }value\text{ }C\text{ }that\text{ }minimizes\text{ }Var(\hat{\theta}_{CV})\text{ }is\text{ }C^*=-\frac{Cov(g(x),h(x))}{Var(h(x))}\] \[Substituting\text{ }C^*\text{ }in\text{ }Var(\hat{\theta}_{CV}),we\text{ }get\] \[Var(\hat{\theta}_{CV^{*}})=\frac{1}{n}\{Var(g(x)-\frac{[Cov(g(x),h(x))]^2}{Var(h(x))}\}=Var(\hat{\theta}_{MC})-\frac{1}{n}\{Corr(g(x),h(x))\}^2Var(g(x))\] \[=\{SE(\hat{\theta}_{MC})\}^2-\frac{1}{n}\{Corr(g(x),h(x))\}^2Var(g(x))\] \[=\{SE(\hat{\theta}_{MC})\}^2-\{Corr(g(x),h(x))\}^2\{SE(\hat{\theta}_{MC})\}^2\] \[=\{SE(\hat{\theta}_{MC})\}^2(1-\{Corr(g(x),h(x))\}^2)\] \[We\text{ }can\text{ }put\text{ }SE(\hat{\theta}_{MC})\text{ }and\text{ }Corr(g(x),h(x))\text{ }in\text{ }the\text{ }above\text{ }equation.\] \[Finally, \text{ }SE(\hat{\theta}_{CV^{*}})=\sqrt{Var(\hat{\theta}_{CV^{*}}) }\]

(c)

# Standard Errors of theta.CV
VAR.CV = (1/n)*var(g+(c.star)*(h-mu))
SE.CV = sqrt(VAR.CV)
SE.CV
## [1] 0.0003939893

Both values are the same.

(d)

n = 20000
u = runif(n)
inv_function <- function(u){
  x=1-sqrt(1-u)  
  return(x)
}
x = inv_function(u)
f = 2*(1-x)

g = (1/(1+(x)^2))*exp(-x)
h = exp(-0.5)/(1+(x)^2)
mu = pi/(4*exp(1/2))
c.star = -cov(g/f,h/f)/var(h/f)
theta.CV = (1/n)*sum(g/f+c.star*(h/f-mu))
theta.CV
## [1] 0.5250716

(e)

VAR.CV = (1/n)*(var(g/f)+(c.star^2)*var(h/f)+2*c.star*cov(g/f,h/f))
SE.CV = sqrt(VAR.CV)
SE.CV
## [1] 0.0003992632

The standard error from (e) is smaller than the standard error from (c).

Problem 2-1

(a)

n = 20000
x = rcauchy(n) 
x = x[which(x>0 & x<1)]
f = dexp(x, rate=1)
g = dcauchy(x)
h = 1/(1+x^2)
theta.IS = (1/n)*sum(h*(f/g))
var.IS = (1/n)*var(h*(f/g))
S.E.IS = sqrt(var.IS)
theta.IS
## [1] 0.538098
S.E.IS
## [1] 0.003959627

(b)

n = 20000
u = runif(n)
x = tan(pi*u/4)
f = dexp(x, rate=1)
g = 4/(pi*(1+x^2))
h = 1/(1+x^2)
theta.IS.2 = (1/n)*sum(h*(f/g))
var.IS.2 = (1/n)*var(h*(f/g))
S.E.IS.2 = sqrt(var.IS.2)
theta.IS.2
## [1] 0.5244159
S.E.IS.2
## [1] 0.0009935409

Problem 2-2

sqrtm <- function(A){
  a = eigen(A)
  sqm = a$vectors %*% diag(sqrt(a$values)) %*% t(a$vectors)
  sqm = (sqm+t(sqm))/2
}

gen <- function(n,p,mu,sig,seed){
  set.seed(seed)
  x = matrix(rnorm(n*p),n,p)
  ndata = x %*% sqrtm(sig) + matrix(mu,n,p,byrow = TRUE)
  ndata
}

n = 20000
p = 3
mu = c(0,0,0)
sig = matrix(c(1,3/5,1/3,3/5,1,11/15,1/3,11/15,1), nrow=3)
sig.inv = solve(sig)

y = gen(n,p,mu,sig,seed=1)

f_fun <- function(x){
  x = as.matrix(x, ncol=1)
  v = (5+t(x)%*%sig.inv%*%x)^(-4)
  return(v)
}

h_fun <- function(x){
  if(x[1]<= 1 && x[2]<=4 && x[3]<=2){
    return(1)} else{
      return(0)
    }}   

g_fun <- function(x){
  v = (1/2*pi)^(p/2)*det(sig)^(-1/2)*exp((-1/2)%*%t(x-mu)%*%sig.inv%*%(x-mu))
  return(v)
}

f = apply(y,1,f_fun)
h = apply(y,1,h_fun)
g = apply(y,1,g_fun)
w.star = f/g
w.sum = sum(w.star)

# true value of the probability to a good approximation = 0.79145379
theta.IS.3 = sum(h*w.star)/w.sum
theta.IS.3
## [1] 0.8045336

Problem 3

(a)

y = seq(-3,3,0.1)
g = exp(-abs(y))/2
f = dnorm(y)
plot(y, g, type="l", col="red", xlab="y", ylab="density",lwd=3)
lines(y, f, col="blue",lwd=3)
legend(1,0.5,c("double exp","std normal"),lwd=c(3,3),col=c("red","blue"))

As we can see from the plot, the density of the double exponential distribution intersects the density of the standard normal distribution at a value of y>0 and at a value of y<0.

(b)

alpha_fun <- function(alpha,x){
  g = exp(-abs(x))/2
  f = dnorm(x)
  e = g/alpha
  while(any(e<f) & alpha>0){
  alpha = alpha-0.01
  e = g/alpha
  }
 return(alpha)
}

x = seq(-3,3,length.out=1000)
alpha.ini = 1
alpha = alpha_fun(alpha=alpha.ini,x)
alpha
## [1] 0.76

\[If\text{ }we\text{ }obtain\text{ }\alpha\text{ }algebraically,\] \[Set\text{ }\text{ }\frac{e^{-|x|}/2}{\alpha}=\frac{1}{\sqrt{2\pi}}e^{-\frac{x^2}{2}}\] \[log(\frac{\sqrt{2\pi}}{2\alpha})+log(e^{-|x|})=log(e^{-\frac{x^2}{2}})\] \[\frac{x^2}{2}-|x|+log(\frac{\sqrt{2\pi}}{2\alpha})=0\] \[x^2-2|x|+2log(\frac{\sqrt{2\pi}}{2\alpha})=0\] \[Since\text{ }the\text{ }discriminant\text{ }should\text{ }be\text{ }0,\] \[If\text{ }we\text{ }solve\text{ }D=b^2-4ac = 0 \text{ }\text{}\] \[then,\text{ }\alpha = 0.761735\approx 0.76\]

(c)

library(nimble)
## nimble version 0.13.2 is loaded.
## For more information on NIMBLE and a User Manual,
## please visit https://R-nimble.org.
## 
## Note for advanced users who have written their own MCMC samplers:
##   As of version 0.13.0, NIMBLE's protocol for handling posterior
##   predictive nodes has changed in a way that could affect user-defined
##   samplers in some situations. Please see Section 15.5.1 of the User Manual.
## 
## Attaching package: 'nimble'
## The following object is masked from 'package:stats':
## 
##     simulate
n = 10000
u = runif(n)
y = rdexp(n)
f = dnorm(y)
e = ddexp(y)/alpha
x = y[which(u <= f/e)]
hist(x)

Problem 4

(a)

library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(purrr)
## Warning: package 'purrr' was built under R version 4.1.3
data4 <- read.table("C:/RClass/coal.dat", header=TRUE)
attach(data4)
m = 10000
theta = rdunif(m,1,111)
## Warning: `rdunif()` was deprecated in purrr 1.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
a1 = rgamma(m,10,10)
a2 = rgamma(m,10,10)
lambda1 = rgamma(m,3,a1)
lambda2 = rgamma(m,3,a2)

# theta
L1 = matrix(NA, nrow=m, ncol=1)
L2 = matrix(NA, nrow=m, ncol=1)
Like = matrix(NA, nrow=m, ncol=1)
for (i in 1:m){
L1 = prod(dpois(disasters[1:(theta[i])],lambda1[i]))
L2 = prod(dpois(disasters[(theta[i]+1):112],lambda2[i]))
Like[i] = prod(L1, L2)
} 
W = rep(NA,m)
for (i in 1:m){
W[i] = Like[i]/sum(Like)
} 
n = 10000
resample1 <- sample(m,n,prob=W,replace=TRUE)
# posterior mean (theta)
mean(theta[resample1])
## [1] 38.6636
# credible interval (theta)
quantile(theta[resample1],probs=c(0.025, 0.975))
##  2.5% 97.5% 
##    36    44
# histogram (theta)
hist(theta[resample1],xlab="theta",main="Histogram of Theta")

# lambda1
# posterior mean (lambda1)
mean(lambda1[resample1])
## [1] 3.17331
# credible interval (lambda1)
quantile(lambda1[resample1],probs=c(0.025, 0.975))
##     2.5%    97.5% 
## 2.750273 3.670595
# histogram (lambda1)
hist(lambda1[resample1],xlab="lambda1",main="Histogram of Lambda1")

# lambda2
# posterior mean (lambda2)
mean(lambda2[resample1])
## [1] 0.9949581
# credible interval (lambda2)
quantile(lambda2[resample1],probs=c(0.025, 0.975))
##      2.5%     97.5% 
## 0.7981377 1.1662869
# histogram (lambda2)
hist(lambda2[resample1],xlab="lambda2", main="Histogram of Lambda2")

# scatter plot (lambda1/lambda2)
plot(lambda1, lambda2, type="p", col="blue", main="Scatter Plot of lambda1 vs. lambda2")
points(lambda1[resample1],lambda2[resample1], col="red")
legend("topright",c("initial sample","resampling sample"), text.col=c("blue","red"))

# Initial Sample Size is
print(m)  
## [1] 10000
# Resampling Sample Size is
print(n)  
## [1] 10000
# Number of Unique values(theta) is
length(unique(theta[resample1]))
## [1] 17
# Number of Unique values(lambda1) is
length(unique(lambda1[resample1]))
## [1] 65
# Number of Unique values(lambda2) is
length(unique(lambda2[resample1]))
## [1] 65
# Highest Observed Frequency(theta) is
max(table(theta[resample1]))
## [1] 3483
# Highest Observed Frequency(lambda1) is
max(table(lambda1[resample1]))
## [1] 1907
# Highest Observed Frequency(lambda2) is
max(table(lambda2[resample1]))
## [1] 1907