产生一个正态分布的数据
set.seed(1001)
N <- 100
x <- rnorm(N, mean = 3, sd = 2)
mean(x)
## [1] 2.998305
sd(x)
## [1] 2.288979
定义似然函数
LL <- function(mu, sigma) {
R = dnorm(x, mu, sigma)
-sum(log(R))
}
library(stats4)
#由于计算时产生了负值,所以出现警告信息
mle(LL, start = list(mu = 1, sigma=1))
## Warning in dnorm(x, mu, sigma): 产生了NaNs
## Warning in dnorm(x, mu, sigma): 产生了NaNs
## Warning in dnorm(x, mu, sigma): 产生了NaNs
##
## Call:
## mle(minuslogl = LL, start = list(mu = 1, sigma = 1))
##
## Coefficients:
## mu sigma
## 2.998305 2.277506
#比如,mean=1,sd=-1,标准差不能为负数
dnorm(x, 1, -1)
## Warning in dnorm(x, 1, -1): 产生了NaNs
## [1] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
## [18] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
## [35] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
## [52] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
## [69] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
## [86] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
mle()调用optim(),mle()默认方法BFGS
library(stats4)
mle(LL, start = list(mu = 1, sigma=1), method = "L-BFGS-B", lower = c(-Inf, 0),
upper = c(Inf, Inf))
##
## Call:
## mle(minuslogl = LL, start = list(mu = 1, sigma = 1), method = "L-BFGS-B",
## lower = c(-Inf, 0), upper = c(Inf, Inf))
##
## Coefficients:
## mu sigma
## 2.998304 2.277506
修改似然函数
LL <- function(mu, sigma) {
R = suppressWarnings(dnorm(x, mu, sigma))
-sum(log(R))
}
mle(LL, start = list(mu = 1, sigma=1))
##
## Call:
## mle(minuslogl = LL, start = list(mu = 1, sigma = 1))
##
## Coefficients:
## mu sigma
## 2.998305 2.277506
#初始值选择错误,将对结果又较大影响
mle(LL, start = list(mu = 0, sigma=1))
##
## Call:
## mle(minuslogl = LL, start = list(mu = 0, sigma = 1))
##
## Coefficients:
## mu sigma
## 51.4840 226.8299
产生数据
x <- runif(N)
y <- 5 * x + 3 + rnorm(N)
fit <- lm(y ~ x)
summary(fit)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.96206 -0.59016 -0.00166 0.51813 2.43778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.1080 0.1695 18.34 <2e-16 ***
## x 4.9516 0.2962 16.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8871 on 98 degrees of freedom
## Multiple R-squared: 0.7404, Adjusted R-squared: 0.7378
## F-statistic: 279.5 on 1 and 98 DF, p-value: < 2.2e-16
plot(x, y)
abline(fit, col = "red")
由于模型不是概率密度函数(pdf),无法像正态分布那样进行精确的似然函数的定义。由于线性模型要求残差符合正态分布,所以定义残差的似然函数
LL <- function(beta0, beta1, mu, sigma) {
# Find residuals
R = y - x * beta1 - beta0
# Calculate the likelihood for the residuals (with mu and sigma as parameters)
R = suppressWarnings(dnorm(R, mu, sigma))
# Sum the log likelihoods for all of the data points
-sum(log(R))
}
LL <- function(beta0, beta1, mu, sigma) {
R = y - x * beta1 - beta0
#
R = suppressWarnings(dnorm(R, mu, sigma, log = TRUE))
#
-sum(R)
}
#同样,初始值很重要,错误的选择将有问题
#fit <- mle(LL, start = list(beta0 = 3, beta1 = 1, mu = 0, sigma=1))
#fit <- mle(LL, start = list(beta0 = 4, beta1 = 2, mu = 0, sigma=1))
fit <- mle(LL, start = list(beta0 = 5, beta1 = 3, mu = 0, sigma=1))
summary(fit)
## Warning in sqrt(diag(object@vcov)): 产生了NaNs
## Maximum likelihood estimation
##
## Call:
## mle(minuslogl = LL, start = list(beta0 = 5, beta1 = 3, mu = 0,
## sigma = 1))
##
## Coefficients:
## Estimate Std. Error
## beta0 4.0540205 NaN
## beta1 4.9516167 0.29319257
## mu -0.9459795 NaN
## sigma 0.8782279 0.06209982
##
## -2 log L: 257.8177
fit <- mle(LL, start = list(beta0 = 2, beta1 = 1.5, sigma=1), fixed = list(mu = 0),
nobs = length(y))
summary(fit)
## Maximum likelihood estimation
##
## Call:
## mle(minuslogl = LL, start = list(beta0 = 2, beta1 = 1.5, sigma = 1),
## fixed = list(mu = 0), nobs = length(y))
##
## Coefficients:
## Estimate Std. Error
## beta0 3.1080361 0.16779400
## beta1 4.9516269 0.29319183
## sigma 0.8782257 0.06209942
##
## -2 log L: 257.8177
模型比较
AIC(fit)
## [1] 263.8177
BIC(fit)
## [1] 271.6332
logLik(fit)
## 'log Lik.' -128.9088 (df=3)
mle()函数调用失败的问题在于对求Hessian 矩阵的逆矩阵。stats4包中的mle()方法除了有较好的初始值外,没有更好的解决办法。bbmle包中的mle2()提供了更好的替代方法。
library(bbmle)
fit <- mle2(LL, start = list(beta0 = 3, beta1 = 1, mu = 0, sigma = 1))
summary(fit)
## Maximum likelihood estimation
##
## Call:
## mle2(minuslogl = LL, start = list(beta0 = 3, beta1 = 1, mu = 0,
## sigma = 1))
##
## Coefficients:
## Estimate Std. Error z value Pr(z)
## beta0 3.054021 0.083897 36.4019 <2e-16 ***
## beta1 4.951617 0.293193 16.8886 <2e-16 ***
## mu 0.054021 0.083897 0.6439 0.5196
## sigma 0.878228 0.062100 14.1421 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## -2 log L: 257.8177