library(formatR)
set.seed(5)
x <- rexp(10000, 5)pb2
1. The exponential distribution
a.
The support of ( X ) is \([0, \infty)\).
Reference:
[1] https://en.wikipedia.org/wiki/Exponential_distribution
[2] https://math.meta.stackexchange.com/questions/27874/cannot-find-how-to-write-infinity
b.
The log-likelihood function for ( n ) independent observations under the exponential model is:
\[
\ell(\theta) = n \ln \theta - \theta \sum_{i=1}^n x_i.
\]
Reference:
[1] https://www.statlect.com/glossary/log-likelihood
[2] https://rpubs.com/kylewbrown/math-expressions
c.
The maximum likelihood estimator (MLE) for \[{\theta}\] in the exponential distribution is:
\[ \hat{\theta} = \frac{n}{\sum_{i=1}^n x_i} = \frac{1}{\bar{x}}, \]
[1] https://bookdown.org/fmcron/Rhodes-template/math-expressions.html
d.
(1)
expr<-function(theta,x){
n<-length(x)
if(theta<=0) return(-Inf)
n*log(theta)-theta*sum(x)
}[1] Ward,et.al Maximum Likelihood for social science P14-16
(2)
set.seed(1234)
x<-rexp(10000,rate=5)
theta_vals<-seq(1,10,length.out=100)
loglike<-sapply(theta_vals,expr,x=x)
plot(theta_vals,loglike,col="black",lwd=2,xlab=expression(theta),ylab="Log-likelihood",main="Log-Likelihood of Exponential Distribution")From the plot, locate the value of \[{\theta}\] at which the log-likelihood is maximized. It should be approximately \[\hat{\theta}\] =5 as the true rate parameter used to generate the data was 5.
[2] https://www.dataquest.io/blog/apply-functions-in-r-sapply-lapply-tapply/
(3)
theta_hat<-1/mean(x)
theta_hat[1] 5.058202
(4)
theta1<-6
loghat<-expr(theta_hat,x)
loghat1<-expr(theta1,x)
ratio<-exp(loghat-loghat1)
ratio[1] 1.179729e+67
[1] Ward,et.al Maximum Likelihood for social science P32-36
(5)
#BFGS
neglike<-function(beta,x){
theta<-exp(beta)
n<-length(x)
-n*log(theta)+theta*sum(x)
}
set.seed(1234)
x<-rexp(10000,rate=5)
start_time<-system.time({
mle.fit<-optim(
par=1,
fn=neglike,
x=x,
method="BFGS",
control=list(
trace=TRUE,
maxit=1000,
fnscale=-1
) ,
hessian=TRUE
)
})initial value 4625.992354
final value -7401915.766793
converged
bfgs_time <-start_time["elapsed"]
thetabfgs <-mle.fit$par
se_beta<-sqrt(1/mle.fit$hessian)Warning in sqrt(1/mle.fit$hessian): NaNs produced
se_theta<-theta_hat*se_beta
print(bfgs_time)elapsed
0.002
print(thetabfgs)[1] -740.1957
print(se_theta) [,1]
[1,] NaN
#SANN
start_time<-system.time({
mle.fit<-optim(
par=1,
fn=neglike,
x=x,
method="SANN",
control=list(
trace=TRUE,
maxit=1000,
fnscale=-1
) ,
hessian=TRUE
)
})sann objective function values
initial value 4625.992354
iter 999 value -643722.498738
final value -643722.498738
sann stopped after 999 iterations
bfgs_time <-start_time["elapsed"]
thetabfgs <-mle.fit$par
se_beta<-sqrt(1/mle.fit$hessian)Warning in sqrt(1/mle.fit$hessian): NaNs produced
se_theta<-theta_hat*se_beta
print(bfgs_time)elapsed
0.011
print(thetabfgs)[1] -64.37225
print(se_theta) [,1]
[1,] NaN
[1] https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/optim
[2] Ward,et.al Maximum Likelihood for social science P14-16
[3] https://en.wikipedia.org/wiki/Exponential_distribution
[4] https://statisticsbyjim.com/probability/exponential-distribution/
2.
a.
mvn <- function(xy) {
x <- xy[1]
y <- xy[2]
z <- exp(-0.5 * ((x - 2)^2 + (y - 1)^2))
return(z)
}
# install.packages('lattice')
library(lattice)
y <- x <- seq(-5, 5, by = 0.1)
grid <- expand.grid(x, y)
names(grid) <- c("x", "y")
grid$z <- apply(grid, 1, mvn)
wireframe(z ~ x + y, data = grid, shade = TRUE, light.source = c(10, 0, 10), scales = list(arrows = FALSE))The function represents a 3D gaussian distribution centered at (2,1). Because the function is the exponential of a negative quadratic form, its maximum occurs at the center of the distribution (2,1) where (x - 2)^2 + (y - 1)^2 is minimized.
[1] https://en.wikipedia.org/wiki/Normal_distribution
b.
negmvn <- function(xy) {
x <- xy[1]
y <- xy[2]
z <- exp(-0.5 * ((x - 2)^2 + (y - 1)^2))
return(-z)
}
optim(c(1,0),negmvn,method = "BFGS")$par
[1] 2 1
$value
[1] -1
$counts
function gradient
14 6
$convergence
[1] 0
$message
NULL
optim(c(5,5),negmvn,method = "BFGS")$par
[1] 4.998888 4.998518
$value
[1] -3.761331e-06
$counts
function gradient
100 100
$convergence
[1] 1
$message
NULL
c.
logmvn<-function(xy){
x<-xy[1]
y<-xy[2]
z<--0.5 * ((x - 2)^2 + (y - 1)^2)
return(z)
}
grid$z <- apply(grid, 1,logmvn)
wireframe(z ~ x + y, data = grid, shade = TRUE, light.source = c(10, 0, 10), scales = list(arrows = FALSE))optim(c(1,0),logmvn,method = "BFGS",control=list(fnscale = -1))$par
[1] 2 1
$value
[1] -9.860761e-30
$counts
function gradient
6 2
$convergence
[1] 0
$message
NULL
optim(c(5,5),logmvn,method = "BFGS",control=list(fnscale = -1))$par
[1] 2 1
$value
[1] -7.888609e-30
$counts
function gradient
8 2
$convergence
[1] 0
$message
NULL
[1] https://stackoverflow.com/questions/15340658/using-wireframe-graphs-in-r
3.
a.
