Main functions
Initialization of the algorithm with repetitions of Kmeans clustering and few iterations of E and M steps. For the initialization step, 3 functions are necessary : a Mstep function adapted for the initialization, and the E and M step function used later for the EM-algorithm.
Maximization step used for the initialization:
MstepMMbinit = function(X, espt, rho = rho, THETA = THETA, maxiter = 10, epsilon = 1e-5){
n = dim(X)[1]
p = dim(X)[2]
M = dim(espt)[2]
# Z : pi (proportion of each cluster)
pik = sapply(1:M, function(k) {(1/n) * sum(espt[, k])})
# X :
# mu:
mu = sapply(1:M, function(k) sapply(1:p, function(j) sum(espt[, k] * X[, j]) / sum(espt[, k])))
# SIGMA
SIGMA = lapply(1:M, function(k) Reduce('+', lapply (1:n, function(i) sapply(1:p, function(j) sapply(1:p, function(l) (X[i, j] - mu[j, k]) * (X[i, l] - mu[l, k]))) * espt[i, k])) / sum(espt[, k]))
if (sum(abs(rho)) != 0){
SIGMA = lapply(1:M, function(k) glasso(SIGMA[[k]], rho = rho[k], wi.init = THETA)$w)
THETA = lapply(1:M, function(k) glasso(SIGMA[[k]], rho = rho[k], wi.init = THETA)$wi)
} else {THETA = lapply(1:M, function(k) solve(SIGMA[[k]]))}
# llik
llik = sum(sapply(1:n, function(i) log(sum(sapply(1:M, function(k) pik[k]* dmvnorm(X[i, ], mean = mu[, k], sigma = SIGMA[[k]]))))))
ddltheta = sapply(1:M, function(m) sum(abs(THETA[[m]])>0))
npar.THETA = sum(sapply(1:M, function(m) (ddltheta[m]+p)/2))
npar = M*p + M - 1 + npar.THETA
BIC = (-2 * llik[length(llik)]) + (npar * log(n))
return(list(pik = pik, mu = mu, SIGMA = SIGMA, THETA = THETA, llik = llik, BIC = BIC))
}
Maximization step: to estimate the parameters
MstepMMb = function(Y, X, beta = beta, espt, Intercept = FALSE, lasso = lasso, lambda = lambda, rho = rho, THETA = THETA, maxiter = 10, epsilon = 1e-5){
n = dim(X)[1]
p = dim(X)[2]
M = dim(espt)[2]
classetemp = apply(espt, 1, which.max)
if (Intercept) {X1 = cbind(1, X)}
else {X1 = X}
# Z : pi (proportion of each cluster)
pik = sapply(1:M, function(k) {(1/n) * sum(espt[, k])})
# X :
# mu:
mu = sapply(1:M, function(k) sapply(1:p, function(j) sum(espt[, k] * X[ ,j]) / sum(espt[, k])))
# SIGMA
SIGMA = lapply(1:M, function(k) Reduce('+', lapply (1:n, function(i) sapply(1:p, function(j) sapply(1:p, function(l) (X[i, j] - mu[j, k]) * (X[i, l] - mu[l, k]))) * espt[i, k])) / sum(espt[, k]))
if (sum(abs(rho)) != 0){
SIGMA = lapply(1:M, function(k) glasso(SIGMA[[k]], rho = rho[k], wi.init = THETA)$w)
THETA = lapply(1:M, function(k) glasso(SIGMA[[k]], rho = rho[k], wi.init = THETA)$wi)
} else {THETA = lapply(1:M, function(k) solve(SIGMA[[k]]))}
# Y :
# beta
if (lasso == FALSE) {
if (Intercept) {X1 = cbind(1, X)}
else {X1 = X}
beta = sapply(1:M, function(k) NR_logit_w(Y, X1, beta = beta[, k], w = espt[, k])$beta)
for (k in 1:M){
if (glm(Y[which(classetemp == k)]~., data = as.data.frame(X[which(classetemp == k), ]), family = binomial, method = "detect_separation")$separation == TRUE) {
beta[, k] = NR_logit_w_lasso(Y, X1, beta = beta[, k], w = espt[, k], lambda = .5)$beta
} else { beta[, k] = NR_logit_w(Y, X1, beta = beta[, k], w = espt[, k])$beta }
}
} else {
if (Intercept) {
beta = sapply(1:M, function(k) as.matrix(coef(glmnet(X, Y, family = "binomial", weights = espt[, k], standardize = TRUE, lambda = lambda[k]))))
} else {
beta = sapply(1:M, function(k) as.matrix(coef(glmnet(X, Y, family = "binomial", weights = espt[, k], standardize = TRUE, lambda = lambda[k], intercept = FALSE))))
beta = as.matrix(beta[-1, ])
}
}
# llik
pbx = sapply(1:M, function(k) sapply(1:n, function(i) exp(sum(X1[i, ] * beta[, k]))/ (1 + exp(sum(X1[i, ] * beta[, k])))))
llik = sum(sapply(1:n, function(i) log(sum(sapply(1:M, function(k) pik[k]* dbinom(Y[i], size = 1, prob = pbx[i, k]) * dmvnorm(X[i, ], mean = mu[, k], sigma = SIGMA[[k]]))))))
return(list(pik = pik, mu = mu, SIGMA = SIGMA, THETA = THETA, beta = beta, llik = llik, lambda = lambda, rho = rho))
}
Expectation step: to compute the posterior probabilities
EstepMMb = function(Y, X, pik, mu, SIGMA, beta, M, Intercept = FALSE){
if (Intercept) {X1 = cbind(1, X)}
else {X1 = X}
n = dim(X1)[1]
p = dim(X1)[2]
if (M == 1) {
espt = as.matrix(rep(1, nrow(X)))
} else {
pbx = sapply(1:M, function(k) sapply(1:n, function(i) exp(sum(X1[i, ] * beta[, k]))/ (1 + exp(sum(X1[i, ] * beta[, k])))))
espt = sapply(1:M, function(k) sapply(1:n, function(i) pik[k] * dbinom(Y[i], size = 1, prob = pbx[i, k]) * dmvnorm(X[i, ], mean = mu[, k], sigma = SIGMA[[k]]) / sum(sapply(1:M, function(z) pik[z] * dbinom(Y[i], size = 1, prob = pbx[i, z]) * dmvnorm(X[i, ], mean = mu[, z], sigma = SIGMA[[z]]) ))))
}
return(list(espt = espt))
}
Initialization function: to choose initialization parameters among repetitions of clustering with a k-means algorithm and parameters estimation with a few iterations of E and M steps.
init.stepMMb = function(Y, X, param.init, M, maxrep = 10, Intercept = FALSE, lasso = TRUE, lambda = rep(0, M), rho = rep(0, M)) {
if (Intercept) {X1 = cbind(1, X)}
else {X1 = X}
n = dim(X1)[1]
p = dim(X1)[2]
pii0 = mui0 = SIGMAi0 = THETAi0 = betai0 = espti0 = classes0 = list()
lliki0 = c()
if (param.init == 0) {
for (it in 1:maxrep){
# kmeans :
resclas = kmeans(X, centers = M)
pii0[[it]] = resclas$size/n
mui0[[it]] = t(resclas$centers)
# For a cluster with only an individuals
Ngr = 0
Kout = NULL
Iout = NULL
for (k in 1:M) {
if (length(which(resclas$cluster == k)) == 1) {
Ngr = Ngr + 1
Ioutdef = c(Ioutdef, which(resclas$cluster == k))
Iout = which(resclas$cluster == k)
Xout = X[Iout, ]
X = X[-Iout, ]
Y = Y[-Iout]
n = n-1
Kout = k
classes = resclas$cluster[-Iout]
}}
if (Ngr == 0) {
classes = resclas$cluster
Kout = M+1
Iout = NULL
}
SIGMAi0[[it]] = lapply(1:M, function(k) var(X[classes == k, ]))
THETAi0[[it]] = list()
for (k in 1:M) {
if (is.singular.matrix(SIGMAi0[[it]][[k]])) {
THETAi0[[it]][[k]] = glasso(SIGMAi0[[it]][[k]], rho = 0.1)$wi
} else { THETAi0[[it]][[k]] = solve(SIGMAi0[[it]][[k]]) }
}
if (lasso == FALSE) {
lambdai = rep(0, M) ; rhoi = rep(0, M)
if (M > 1) {
for (k in 1:M){ if (p*(M+1) > length(which(classes == k))) {lambdai[k] = 0.01; rhoi[k] = 0.01}}
}
if (sum(abs(lambdai)) != rep(0, M) | sum(abs(rhoi)) != rep(0, M)) {lasso = TRUE}
} else {lambdai = lambda; rhoi = rho;
for (k in 1:M){ if (p*(M+1) > length(which(classes == k))) {lambdai[k] = lambda[k]*5; rhoi[k] = rho[k]*5}}
}
if (Intercept) {
betai0[[it]] = sapply(1:M, function(k) as.matrix(coef(glmnet(X[classes == k,], Y[classes == k], family = "binomial", standardize = TRUE, lambda = lambdai[k]))))
} else {
betai0[[it]] = sapply(1:M, function(k) as.matrix(coef(glmnet(X[classes == k,], Y[classes == k], family = "binomial", standardize = TRUE, lambda = lambdai[k], intercept = FALSE))))
}
# Repetition of the EM algorithm
resE = EstepMMb(Y, X, pik = pii0[[it]], mu = as.matrix(mui0[[it]]), SIGMA = SIGMAi0[[it]], beta = betai0[[it]], M = M, Intercept = Intercept)
resM = MstepMMb(Y, X, beta = betai0[[it]], espt = resE$espt, Intercept = Intercept, lasso = lasso, lambda = lambdai, rho = rhoi, THETA = THETAi0[[it]])
repet = 1
while (repet <= maxrep) {
resE = EstepMMb(Y, X, pik = resM$pik, mu = resM$mu, SIGMA = resM$SIGMA, beta = resM$beta, M = M, Intercept = Intercept)
resM = MstepMMb(Y, X, beta = resM$beta, espt = resE$espt, Intercept = Intercept, lasso = lasso, lambda = lambdai, rho = rhoi, THETA = resM$THETA)
repet = repet + 1
}
pii0[[it]] = resM$pik
mui0[[it]] = resM$mu
SIGMAi0[[it]] = resM$SIGMA
THETAi0[[it]] = resM$THETA
betai0[[it]] = resM$beta
espti0[[it]] = resE$espt
lliki0[it] = resM$llik
classes0[[it]] = sapply(1:n, function(i) which.max(resE$espt[i, ]))
}
choix = which.max(lliki0)
pii = pii0[[choix]]
mui = mui0[[choix]]
SIGMAi = SIGMAi0[[choix]]
THETAi = THETAi0[[choix]]
betai = betai0[[choix]]
espti = espti0[[choix]]
llik = lliki0[choix]
classifi = classes0[[choix]]
} else {
pii = param.init[1]
mui = param.init[2]
SIGMAi = param.init[3]
THETAi = param.init[4]
betai = param.init[5]
pbxi = sapply(1:M, function(k) sapply(1:n, function(i) exp(sum(X1[i, ] * betai[, k]))/ (1 + exp(sum(X1[i, ] * betai[, k])))))
espti = sapply(1:M, function(k) sapply(1:n, function(i) pii[k] * dbinom(Y[i], size = 1, prob = pbxi[i, k]) * dmvnorm(X[i, ], mean = mui[, k], sigma = SIGMAi[[k]]) / sum(sapply(1:M, function(z) pii[z] * dbinom(Y[i], size = 1, prob = pbxi[i, z]) * dmvnorm(X[i, ], mean = mui[, z], sigma = SIGMAi[[z]]) ))))
llik = sum(sapply(1:n, function(i) log(sum(sapply(1:M, function(k) pii[k]* dbinom(Y[i], size = 1, prob = pbxi[i, k]) * dmvnorm(X[i, ], mean = mui[, k], sigma = SIGMAi[[k]]))))))
classifi = sapply(1:n, function(i) which.max(espti[i, ]))
}
return(list(pik = pii, mu = mui, SIGMA = SIGMAi, THETA = THETAi, beta = betai, llik = llik, classifi = classifi, Iout = Iout, M = M, lambda = lambdai, rho = rhoi))
}