## MLE of a Normal Distribution

## estimate unknown parameter mu given sigma squared
likelihood.mu <- function(mu,sigma.squared = 1, x){    
  n <- length(x)
  const1 <- (2 * pi * sigma.squared) ^ ( - n * 0.5)
  const2 <- (- 1 / (2 * sigma.squared))
  term <- (x - mu) ^ 2
  likelihood.value <- const1 * exp(const2 * sum(term))
  return(likelihood.value)
}

n <- 100
set.seed(1)
x <- rnorm(n, mean = 0, sd = 1)  ##  data

mu.values <- seq(-1,1,length.out = 100)
like.values <- rep(0, length(mu.values))

for(i in 1:length(like.values)) {
  like.values[i] = likelihood.mu(mu.values[i], sigma.squared = 1, x = x)
}

plot(mu.values, like.values, type = "l", col = "red", lwd = 3.5)
abline(v = 0.1088874, col = "black", lwd = 3)

mean(x)
## [1] 0.1088874
var(x)
## [1] 0.8067621
## Both mu and sigma squared unknown
log.likelihood <- function(theta, x){    ## theta <- [mu, sigma ^ 2] (vector of mean and sd) ;
                                    ##  x <- vector of data
  mu <- theta[1]
  sigma.squared <- theta[2]
  n <- length(x)
  const1 <- (log(2 * pi * sigma.squared))* (- n * 0.5)
  const2 <- (- 1 / (2 * sigma.squared))
  term <- (x - mu) ^ 2
  likelihood.value <- const1 + (const2 * sum(term))
  return(-1 * likelihood.value)
}

##  Maximizing likelihood is same as minimizing negative log likelihood function
##  Optimizing 2 or more parameters - use optim; if 1 parameter use optimize
##  Optim minimises a function by varying its parameters. 
##  First argument - parameters I'd like to vary, par in this case; 
##  Second argument is the function to be minimised
##  By default optim searches for parameters, which minimise the function fn

n <- 100
set.seed(1)
x <- rnorm(n, mean = 0, sd = 1)  ##  data

##  Compute MLE and standard errors

likeli <- optim(par = c(0.1, 0.8), fn= log.likelihood, x = x, method = "BFGS", hessian = TRUE)
likeli$par
## [1] 0.1088875 0.7986940
likeli$hessian
##               [,1]          [,2]
## [1,]  1.252044e+02 -2.757616e-05
## [2,] -2.757616e-05  7.838153e+01
mean(x)
## [1] 0.1088874
var(x)*(n-1)/n
## [1] 0.7986945
stderror.mle <-  sqrt(diag(solve(likeli$hessian)))
stderror.mle
## [1] 0.08936968 0.11295180