Lucas Schiffer
Feburay 21, 2017
Data Analysis for the Life Sciences
ducks <- c("yellow", "yellow", "red", "yellow", "yellow", "yellow")
colors <- c("red", "green", "blue", "cyan", "magenta", "yellow")
ducks %<>%
rep(50)
N <-
ducks %>%
rep(50) %>%
createFolds(10)
M <-
colors
data(tissuesGeneExpression)
ind <- which(tissue != "placenta")
y <- tissue[ind]
X <- t(e[, ind])
set.seed(1)
idx <- createFolds(y, k = 10)
set.seed(1)
ks <- 1:12
res <- sapply(ks, function(k) {
res.k <- sapply(seq_along(idx), function(i) {
pred <- knn(train = Xsmall[-idx[[i]], ],
test = Xsmall[idx[[i]], ],
cl = y[-idx[[i]]], k = k)
mean(y[idx[[i]]] != pred)
})
mean(res.k)
})
Xsmall <- cmdscale(dist(X), k = 5)
set.seed(1)
ks <- 1:12
res <- sapply(ks, function(k) {
res.k <- sapply(seq_along(idx), function(i) {
pred <- knn(train = Xsmall[-idx[[i]], ],
test = Xsmall[idx[[i]], ],
cl = y[-idx[[i]]], k = k)
mean(y[idx[[i]]] != pred)
})
mean(res.k)
})