yehang — Mar 28, 2014, 9:36 AM
function (x, centers, iter.max = 10, nstart = 1, algorithm = c("Hartigan-Wong",
"Lloyd", "Forgy", "MacQueen"), trace = FALSE)
{
do_one <- function(nmeth) {
switch(nmeth, {
isteps.Qtran <- 50 * m
iTran <- c(as.integer(isteps.Qtran), integer(max(0,
k - 1)))
Z <- .Fortran(C_kmns, x, m, p, centers = centers,
as.integer(k), c1 = integer(m), c2 = integer(m),
nc = integer(k), double(k), double(k), ncp = integer(k),
D = double(m), iTran = iTran, live = integer(k),
iter = iter.max, wss = double(k), ifault = as.integer(trace))
switch(Z$ifault, stop("empty cluster: try a better set of initial centers",
call. = FALSE), Z$iter <- max(Z$iter, iter.max +
1L), stop("number of cluster centres must lie between 1 and nrow(x)",
call. = FALSE), warning(gettextf("Quick-TRANSfer stage steps exceeded maximum (= %d)",
isteps.Qtran), call. = FALSE))
}, {
Z <- .C(C_kmeans_Lloyd, x, m, p, centers = centers,
k, c1 = integer(m), iter = iter.max, nc = integer(k),
wss = double(k))
}, {
Z <- .C(C_kmeans_MacQueen, x, m, p, centers = as.double(centers),
k, c1 = integer(m), iter = iter.max, nc = integer(k),
wss = double(k))
})
if (m23 <- any(nmeth == c(2L, 3L))) {
if (any(Z$nc == 0))
warning("empty cluster: try a better set of initial centers",
call. = FALSE)
}
if (Z$iter > iter.max) {
warning(sprintf(ngettext(iter.max, "did not converge in %d iteration",
"did not converge in %d iterations"), iter.max),
call. = FALSE, domain = NA)
if (m23)
Z$ifault <- 2L
}
if (nmeth %in% c(2L, 3L)) {
if (any(Z$nc == 0))
warning("empty cluster: try a better set of initial centers",
call. = FALSE)
}
Z
}
x <- as.matrix(x)
m <- as.integer(nrow(x))
if (is.na(m))
stop("invalid nrow(x)")
p <- as.integer(ncol(x))
if (is.na(p))
stop("invalid ncol(x)")
if (missing(centers))
stop("'centers' must be a number or a matrix")
nmeth <- switch(match.arg(algorithm), `Hartigan-Wong` = 1,
Lloyd = 2, Forgy = 2, MacQueen = 3)
storage.mode(x) <- "double"
if (length(centers) == 1L) {
if (centers == 1)
nmeth <- 3
k <- centers
if (nstart == 1)
centers <- x[sample.int(m, k), , drop = FALSE]
if (nstart >= 2 || any(duplicated(centers))) {
cn <- unique(x)
mm <- nrow(cn)
if (mm < k)
stop("more cluster centers than distinct data points.")
centers <- cn[sample.int(mm, k), , drop = FALSE]
}
}
else {
centers <- as.matrix(centers)
if (any(duplicated(centers)))
stop("initial centers are not distinct")
cn <- NULL
k <- nrow(centers)
if (m < k)
stop("more cluster centers than data points")
}
k <- as.integer(k)
if (is.na(k))
stop("'invalid value of 'k'")
iter.max <- as.integer(iter.max)
if (is.na(iter.max) || iter.max < 1)
stop("'iter.max' must be positive")
if (ncol(x) != ncol(centers))
stop("must have same number of columns in 'x' and 'centers'")
storage.mode(centers) <- "double"
Z <- do_one(nmeth)
best <- sum(Z$wss)
if (nstart >= 2 && !is.null(cn))
for (i in 2:nstart) {
centers <- cn[sample.int(mm, k), , drop = FALSE]
ZZ <- do_one(nmeth)
if ((z <- sum(ZZ$wss)) < best) {
Z <- ZZ
best <- z
}
}
centers <- matrix(Z$centers, k)
dimnames(centers) <- list(1L:k, dimnames(x)[[2L]])
cluster <- Z$c1
if (!is.null(rn <- rownames(x)))
names(cluster) <- rn
totss <- sum(scale(x, scale = FALSE)^2)
structure(list(cluster = cluster, centers = centers, totss = totss,
withinss = Z$wss, tot.withinss = best, betweenss = totss -
best, size = Z$nc, iter = Z$iter, ifault = Z$ifault),
class = "kmeans")
}
function (x, centers, iter.max = 10, nstart = 1, algorithm = c("Hartigan-Wong",
"Lloyd", "Forgy", "MacQueen"), trace = FALSE)
{
do_one <- function(nmeth) {
switch(nmeth, {
isteps.Qtran <- 50 * m
iTran <- c(as.integer(isteps.Qtran), integer(max(0,
k - 1)))
Z <- .Fortran(C_kmns, x, m, p, centers = centers,
as.integer(k), c1 = integer(m), c2 = integer(m),
nc = integer(k), double(k), double(k), ncp = integer(k),
D = double(m), iTran = iTran, live = integer(k),
iter = iter.max, wss = double(k), ifault = as.integer(trace))
switch(Z$ifault, stop("empty cluster: try a better set of initial centers",
call. = FALSE), Z$iter <- max(Z$iter, iter.max +
1L), stop("number of cluster centres must lie between 1 and nrow(x)",
call. = FALSE), warning(gettextf("Quick-TRANSfer stage steps exceeded maximum (= %d)",
isteps.Qtran), call. = FALSE))
}, {
Z <- .C(C_kmeans_Lloyd, x, m, p, centers = centers,
k, c1 = integer(m), iter = iter.max, nc = integer(k),
wss = double(k))
}, {
Z <- .C(C_kmeans_MacQueen, x, m, p, centers = as.double(centers),
k, c1 = integer(m), iter = iter.max, nc = integer(k),
wss = double(k))
})
if (m23 <- any(nmeth == c(2L, 3L))) {
if (any(Z$nc == 0))
warning("empty cluster: try a better set of initial centers",
call. = FALSE)
}
if (Z$iter > iter.max) {
warning(sprintf(ngettext(iter.max, "did not converge in %d iteration",
"did not converge in %d iterations"), iter.max),
call. = FALSE, domain = NA)
if (m23)
Z$ifault <- 2L
}
if (nmeth %in% c(2L, 3L)) {
if (any(Z$nc == 0))
warning("empty cluster: try a better set of initial centers",
call. = FALSE)
}
Z
}
x <- as.matrix(x)
m <- as.integer(nrow(x))
if (is.na(m))
stop("invalid nrow(x)")
p <- as.integer(ncol(x))
if (is.na(p))
stop("invalid ncol(x)")
if (missing(centers))
stop("'centers' must be a number or a matrix")
nmeth <- switch(match.arg(algorithm), `Hartigan-Wong` = 1,
Lloyd = 2, Forgy = 2, MacQueen = 3)
storage.mode(x) <- "double"
if (length(centers) == 1L) {
if (centers == 1)
nmeth <- 3
k <- centers
if (nstart == 1)
centers <- x[sample.int(m, k), , drop = FALSE]
if (nstart >= 2 || any(duplicated(centers))) {
cn <- unique(x)
mm <- nrow(cn)
if (mm < k)
stop("more cluster centers than distinct data points.")
centers <- cn[sample.int(mm, k), , drop = FALSE]
}
}
else {
centers <- as.matrix(centers)
if (any(duplicated(centers)))
stop("initial centers are not distinct")
cn <- NULL
k <- nrow(centers)
if (m < k)
stop("more cluster centers than data points")
}
k <- as.integer(k)
if (is.na(k))
stop("'invalid value of 'k'")
iter.max <- as.integer(iter.max)
if (is.na(iter.max) || iter.max < 1)
stop("'iter.max' must be positive")
if (ncol(x) != ncol(centers))
stop("must have same number of columns in 'x' and 'centers'")
storage.mode(centers) <- "double"
Z <- do_one(nmeth)
best <- sum(Z$wss)
if (nstart >= 2 && !is.null(cn))
for (i in 2:nstart) {
centers <- cn[sample.int(mm, k), , drop = FALSE]
ZZ <- do_one(nmeth)
if ((z <- sum(ZZ$wss)) < best) {
Z <- ZZ
best <- z
}
}
centers <- matrix(Z$centers, k)
dimnames(centers) <- list(1L:k, dimnames(x)[[2L]])
cluster <- Z$c1
if (!is.null(rn <- rownames(x)))
names(cluster) <- rn
totss <- sum(scale(x, scale = FALSE)^2)
structure(list(cluster = cluster, centers = centers, totss = totss,
withinss = Z$wss, tot.withinss = best, betweenss = totss -
best, size = Z$nc, iter = Z$iter, ifault = Z$ifault),
class = "kmeans")
}