Una innovación clave en el modelamiento de datos relacionales es la incorporación de variables latentes en la forma de caracterĆsticas no observadas de los vĆ©rtices. Es decir, el uso de variables que no se observan directamente pero que son importantes para determinar las probabilidades de interacción.
Bajo este enfoque se tiene que las entradas de la matriz de adyacencia \(\mathbf{Y} = [y_{i,j}]\) asociada con un grafo \(G=(V,E)\) son condicionalmente independientes con distribución Bernoulli: \[ y_{i,j}\mid\mu,\boldsymbol{\beta},\mathbf{x}_{i,j},\boldsymbol{u}_i,\boldsymbol{u}_j \stackrel{\text{ind}}{\sim} \textsf{Bernoulli}\left( g\left( \mu + \boldsymbol{\beta}^{\textsf{T}}\mathbf{x}_{i,j} + \alpha(\boldsymbol{u}_i,\boldsymbol{u}_j) \right) \right) \] donde:
\(g(\cdot)\) es una función de enlace. Por ejemplo, la función logit inversa, \(\textsf{expit}(x)\), o la función de distribución de una Normal estÔndar, \(\Phi(x)\).
\(\mu\) es el intercepto.
\(\boldsymbol{\beta}=(\beta_1,\ldots,\beta_p)\) es un vector de coeficientes asociados con las covariables (variables exógenas) \(\mathbf{x}_{i,j}\).
\(\mathbf{x}_{i,j}=(x_{i,j,1},\ldots,x_{i,j,p})\) es un vector de covariables que se construyen a partir de las variables nodales.
\(\boldsymbol{u}_i=(u_{i,1},\ldots,u_{i,k})\) es un vector de variables latentes definidas en un espacio Euclidiano \(k\)-dimensional denominado Espacio Social.
\(\alpha(\cdot,\cdot)\) es una función simétrica:
Se debe especificar:
Por construcción, los modelos latentes tienen una especificación jerÔrquica, por lo que un enfoque Bayesiano de inferencia es natural.
Se acostumbra usar métodos de cadenas de Markov de Monte Carlo (MCMC, por sus siglas en inglés) o métodos Variacionales para explorar la distribución posterior de los parÔmetros del modelo.
Hoff, P. D., Raftery, A. E., & Handcock, M. S. (2002). Latent space approaches to social network analysis. Journal of the american Statistical association, 97(460), 1090-1098.
\[ y_{i,j}\mid\mu,\boldsymbol{u}_i,\boldsymbol{u}_j \stackrel{\text{ind}}{\sim} \textsf{Bernoulli}\left( \textsf{expit}\left( \mu - \| \boldsymbol{u}_i - \boldsymbol{u}_j \| \right) \right) \] donde:
Conjunto de datos de lazos matrimoniales y comerciales entre familias florentinas del Renacimiento.
Padgett, John F. 1994. Marriage and Elite Structure in Renaissance Florence, 1282-1500. Paper delivered to the Social Science History Association.
# librerias
suppressMessages(suppressWarnings(library(igraph)))
suppressMessages(suppressWarnings(library(latentnet)))
# datos
data(florentine, package = "ergm")
flomarriage
## Network attributes:
## vertices = 16
## directed = FALSE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges= 20
## missing edges= 0
## non-missing edges= 20
##
## Vertex attribute names:
## priorates totalties vertex.names wealth
##
## No edge attributes
class(flomarriage)
## [1] "network"
# matriz de adyacencia
# remover vertice 12 porque es ailado (distancia Inf)
A <- network::as.matrix.network.adjacency(x = flomarriage)
index_rm <- which(colSums(A) == 0)
index_rm
## Pucci
## 12
A <- A[-index_rm,-index_rm]
class(A)
## [1] "matrix" "array"
# variable exogena: riqueza
wealth <- flomarriage %v% "wealth"
wealth <- wealth[-index_rm]
wealth
## [1] 10 36 55 44 20 32 8 42 103 48 49 27 10 146 48
# tipo igraph
g <- igraph::graph_from_adjacency_matrix(adjmatrix = A, mode = "undirected")
class(g)
## [1] "igraph"
# tipo network
flomarriage <- network::as.network.matrix(A)
class(flomarriage)
## [1] "network"
# grafico
set.seed(42)
plot(g, vertex.size = 1.2*sqrt(wealth), vertex.label = NA, main = "Familias")
# ajuste del modelo
# d = 2 : dimension latente bidimensional
# G = 0 : sin factores de agrupamieno
fit <- ergmm(formula = flomarriage ~ euclidean(d = 2, G = 0), seed = 42)
# resumen del ajuste
summary(fit)
## NOTE: It is not certain whether it is appropriate to use latentnet's BIC to select latent space dimension, whether or not to include actor-specific random effects, and to compare clustered models with the unclustered model.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: flomarriage ~ euclidean(d = 2, G = 0)
## Attribute: edges
## Model: Bernoulli
## MCMC sample of size 4000, draws are 10 iterations apart, after burnin of 10000 iterations.
## Covariate coefficients posterior means:
## Estimate 2.5% 97.5% 2*min(Pr(>0),Pr(<0))
## (Intercept) 4.9736 2.3781 8.1291 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Overall BIC: 243.2747
## Likelihood BIC: 82.68488
## Latent space/clustering BIC: 160.5899
##
## Covariate coefficients MKL:
## Estimate
## (Intercept) 3.017229
# cadena verosimilitud
x <- c(fit$sample$lpY)
par(mfrow = c(1,1), mar = c(3,3,1.4,1.4), mgp = c(1.75,.75,0))
plot(x = 1:length(x), y = x, type = "l", xlab = "Iteración", ylab = "Log-verosimilitud", col = "darkgray", main = "Cadena")
abline(h = mean(x), col = 4, lty = 2, lwd = 2)
abline(h = quantile(x, c(0.025,0.975)), col = 2, lty = 3, lwd = 2)
# cadena e histograma beta
par(mfrow = c(1,2), mar = c(3,3,1.4,1.4), mgp = c(1.75,.75,0))
x <- c(fit$sample$beta)
# cadena
plot(x = 1:length(x), y = x, type = "l", xlab = "Iteración", ylab = expression(beta), col = "darkgray", main = "Cadena")
abline(h = mean(x), col = 4, lty = 2, lwd = 2)
abline(h = quantile(x, c(0.025,0.975)), col = 2, lty = 3, lwd = 2)
# histograma
hist(x = x, freq = F, col = "gray90", border = "gray90", xlab = expression(beta), ylab = "Densidad", main = "Distr. marginal")
abline(v = mean(x), col = 4, lty = 2, lwd = 2)
abline(v = quantile(x, c(0.025,0.975)), col = 2, lty = 3, lwd = 2)
# media posterior del intercepto
beta_pm <- mean(fit$sample$beta)
beta_pm
## [1] 4.973614
# probabilidad de interaccion basal
1/(1 + exp(-beta_pm))
## [1] 0.9931294
# muestras posiciones latentes
# transformacion de procrustes
B <- dim(fit$sample$Z)[1] # no. de muestras MCMC
n <- dim(fit$sample$Z)[2] # no. vertices
d <- dim(fit$sample$Z)[3] # dimension latente
U0 <- scale(fit$mcmc.mle$Z, T, T)
U.array <- array(data = NA, dim = c(B,n,d))
for (b in 1:B)
U.array[b,,] <- MCMCpack::procrustes(X = scale(fit$sample$Z[b,,], T, T), Xstar = U0, translation = T, dilation = T)$X.new
U.pm <- apply(X = U.array, MARGIN = c(2, 3), FUN = mean)
# colores
rr <- atan2(U0[,2], U0[,1])
rr <- rr+abs(min(rr))
rr <- rr/max(rr)
gg <- 1 - rr
bb <- U0[,2]^2 + U0[,1]^2
bb <- bb/max(bb)
aa <- 0.4
# grafico adelgazando la cadena cada 8 obs
nthin <- 8
index_thin <- seq(from = nthin, to = B, by = nthin)
plot(NA, NA, cex.axis = 0.7, xlab = "Dimension 1", ylab = "Dimension 2", type = "n", xlim = range(U.array), ylim = range(U.array), main = "Posiciones latentes")
for (i in 1:n) points(U.array[index_thin,i,1], U.array[index_thin,i,2], pch = 15, cex = 0.3, col = rgb(rr[i], gg[i], bb[i], aa))
for (i in 1:n) text(x = U.pm[i,1], y = U.pm[i,2], labels = i, col = 1, cex = 1.1, font = 2)
# posiciones latentes
set.seed(42)
plot(fit, what = "pmean", print.formula = F, main = "Media post. posiciones latentes")
# funcion expit
expit <- function(x) 1/(1+exp(-x))
# probabilidades de interaccion (pedia posterior)
Pi <- matrix(0, n, n)
for (b in 1:B) {
bet <- fit$sample$beta[b]
for (i in 1:(n-1)) {
for (j in (i+1):n) {
lat <- sqrt(sum((fit$sample$Z[b,i,] - fit$sample$Z[b,j,])^2))
Pi[i,j] <- Pi[j,i] <- Pi[i,j] + expit(bet - lat)/B
}
}
}
# grafico
rownames(A) <- colnames(A) <- 1:n
par(mfrow = c(1,2))
corrplot::corrplot(corr = A, type = "full", col.lim = c(0,1), method = "shade", addgrid.col = "gray90", tl.col = "black")
corrplot::corrplot(corr = Pi, type = "full", col.lim = c(0,1), method = "shade", addgrid.col = "gray90", tl.col = "black")
# bondad de ajuste distancia geodesica
fit.gof <- gof(fit, GOF = ~odegree + distance)
plot(fit.gof)
# bondad de ajuste
B <- dim(fit$sample$Z)[1]
n <- dim(fit$sample$Z)[2]
d <- dim(fit$sample$Z)[3]
stat <- matrix(NA, B, 6)
set.seed(42)
for (b in 1:B) {
# intercepto
bet <- fit$sample$beta[b]
# simular datos
Ar <- matrix(0, n, n)
for (i in 1:(n-1)) {
for (j in (i+1):n){
lat <- sqrt(sum((fit$sample$Z[b,i,] - fit$sample$Z[b,j,])^2))
Ar[i,j] <- Ar[j,i] <- rbinom(n = 1, size = 1, prob = expit(bet - lat))
}
}
gr <- igraph::graph_from_adjacency_matrix(adjmatrix = Ar, mode = "undirected")
# calcular estadisticos
stat[b,1] <- igraph::edge_density(graph = gr, loops = F)
stat[b,2] <- igraph::transitivity(graph = gr, type = "global")
stat[b,3] <- igraph::assortativity_degree(graph = gr, directed = F)
stat[b,4] <- igraph::mean_distance(graph = gr, directed = F)
stat[b,5] <- mean(igraph::degree(graph = gr))
stat[b,6] <- sd(igraph::degree(graph = gr))
}
# valores observados
dens_obs <- igraph::edge_density(graph = g, loops = F)
tran_obs <- igraph::transitivity(graph = g, type = "global")
asso_obs <- igraph::assortativity_degree(graph = g, directed = F)
mdis_obs <- igraph::mean_distance(graph = g, directed = F)
mdeg_obs <- mean(igraph::degree(graph = g))
sdeg_obs <- sd(igraph::degree(graph = g))
# graficos
par(mfrow = c(2,3))
hist(x = stat[,1], freq = F, col = "gray90", border = "gray90", xlim = range(stat[,1]), xlab = "Densidad", ylab = "Densidad", main = "Densidad")
abline(v = dens_obs, col = 4, lty = 2)
abline(v = quantile(stat[,1], c(0.025, 0.975)), lty = 3, col = 2)
hist(x = stat[,2], freq = F, col = "gray90", border = "gray90", xlim = range(stat[,2]), xlab = "Transitividad", ylab = "Densidad", main = "Transitividad")
abline(v = tran_obs, col = 4, lty = 2)
abline(v = quantile(stat[,2], c(0.025, 0.975)), lty = 3, col = 2)
hist(x = stat[,3], freq = F, col = "gray90", border = "gray90", xlim = range(stat[,3]), xlab = "Asortatividad", ylab = "Densidad", main = "Asortatividad")
abline(v = asso_obs, col = 4, lty = 2)
abline(v = quantile(stat[,3], c(0.025, 0.975)), lty = 3, col = 2)
hist(x = stat[,4], freq = F, col = "gray90", border = "gray90", xlim = range(stat[,4]), xlab = "Distancia prom.", ylab = "Densidad", main = "Distancia prom.")
abline(v = mdis_obs, col = 4, lty = 2)
abline(v = quantile(stat[,4], c(0.025, 0.975)), lty = 3, col = 2)
hist(x = stat[,5], freq = F, col = "gray90", border = "gray90", xlim = range(stat[,5]), xlab = "Grado prom.", ylab = "Densidad", main = "Grado prom.")
abline(v = mdeg_obs, col = 4, lty = 2)
abline(v = quantile(stat[,5], c(0.025, 0.975)), lty = 3, col = 2)
hist(x = stat[,6], freq = F, col = "gray90", border = "gray90", xlim = range(stat[,6]), xlab = "Grado DE", ylab = "Densidad", main = "Grado DE")
abline(v = sdeg_obs, col = 4, lty = 2)
abline(v = quantile(stat[,6], c(0.025, 0.975)), lty = 3, col = 2)
# valores p
round(mean(stat[,1] > dens_obs), 4)
## [1] 0.394
round(mean(stat[,2] > tran_obs), 4)
## [1] 0.9155
round(mean(stat[,3] > asso_obs), 4)
## [1] 0.972
round(mean(stat[,4] > mdis_obs), 4)
## [1] 0.486
round(mean(stat[,5] > mdeg_obs), 4)
## [1] 0.394
round(mean(stat[,6] > sdeg_obs), 4)
## [1] 0.5432
Hoff, P. D., Raftery, A. E., & Handcock, M. S. (2002). Latent space approaches to social network analysis. Journal of the american Statistical association, 97(460), 1090-1098.
\[ y_{i,j}\mid\mu, \boldsymbol{\beta}, \mathbf{x}_{i,j}, \boldsymbol{u}_i,\boldsymbol{u}_j \stackrel{\text{ind}}{\sim} \textsf{Bernoulli}\left( \textsf{expit}\left( \mu + \boldsymbol{\beta}^{\textsf{T}}\mathbf{x}_{i,j} - \| \boldsymbol{u}_i - \boldsymbol{u}_j \| \right) \right) \] donde:
# librerias
suppressMessages(suppressWarnings(library(igraph)))
suppressMessages(suppressWarnings(library(latentnet)))
# datos
data(florentine, package = "ergm")
flomarriage
## Network attributes:
## vertices = 16
## directed = FALSE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges= 20
## missing edges= 0
## non-missing edges= 20
##
## Vertex attribute names:
## priorates totalties vertex.names wealth
##
## No edge attributes
# clase
class(flomarriage)
## [1] "network"
# matriz de adyacencia
# remover vertice 12 porque es ailado (distancia Inf)
A <- network::as.matrix.network.adjacency(x = flomarriage)
index_rm <- which(colSums(A) == 0)
index_rm
## Pucci
## 12
A <- A[-index_rm,-index_rm]
class(A)
## [1] "matrix" "array"
# variable exogena: riqueza
wealth <- flomarriage %v% "wealth"
wealth <- wealth[-index_rm]
wealth
## [1] 10 36 55 44 20 32 8 42 103 48 49 27 10 146 48
# tipo igraph
g <- igraph::graph_from_adjacency_matrix(adjmatrix = A, mode = "undirected")
class(g)
## [1] "igraph"
# tipo network
flomarriage <- network::as.network.matrix(A)
class(flomarriage)
## [1] "network"
# covariables
x <- abs(outer(X = wealth, Y = wealth, FUN = "-"))
# ajuste del modelo
fit <- ergmm(formula = flomarriage ~ euclidean(d = 2, G = 0) + edgecov(x), seed = 42)
# resumen del ajuste
summary(fit)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: flomarriage ~ euclidean(d = 2, G = 0) + edgecov(x)
## Attribute: edges
## Model: Bernoulli
## MCMC sample of size 4000, draws are 10 iterations apart, after burnin of 10000 iterations.
## Covariate coefficients posterior means:
## Estimate 2.5% 97.5% 2*min(Pr(>0),Pr(<0))
## (Intercept) 4.759504 1.736548 8.3241 5e-04 ***
## edgecov.x 0.078559 0.032922 0.1379 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Overall BIC: 236.582
## Likelihood BIC: 57.20479
## Latent space/clustering BIC: 179.3772
##
## Covariate coefficients MKL:
## Estimate
## (Intercept) 2.62752498
## edgecov.x 0.04428907
# cadena e histograma beta
par(mfrow = c(1,2), mar = c(3,3,1.4,1.4), mgp = c(1.75,.75,0))
x <- c(fit$sample$beta[,2])
# cadena
plot(x = 1:length(x), y = x, type = "l", xlab = "Iteración", ylab = expression(beta), col = "darkgray", main = "Cadena")
abline(h = mean(x), col = 4, lty = 2, lwd = 2)
abline(h = quantile(x, c(0.025,0.975)), col = 2, lty = 3, lwd = 2)
# histograma
hist(x = x, freq = F, col = "gray90", border = "gray90", xlab = expression(beta), ylab = "Densidad", main = "Distr. marginal")
abline(v = mean(x), col = 4, lty = 2, lwd = 2)
abline(v = quantile(x, c(0.025,0.975)), col = 2, lty = 3, lwd = 2)
# media posterior del intercepto
beta_pm <- mean(fit$sample$beta[,2])
beta_pm
## [1] 0.07855933
Krivitsky, P. N., Handcock, M. S., Raftery, A. E., & Hoff, P. D. (2009). Representing degree distributions, clustering, and homophily in social networks with latent cluster random effects models. Social networks, 31(3), 204-213.
\[ y_{i,j}\mid\mu,\boldsymbol{u}_i,\boldsymbol{u}_j \stackrel{\text{ind}}{\sim} \textsf{Bernoulli}\left( \textsf{expit}\left( \mu - \| \boldsymbol{u}_i - \boldsymbol{u}_j \| \right) \right) \] donde:
Sampson (1969) registró las interacciones sociales entre un grupo de monjes mientras Ć©l era un residente como experimentador en el claustro. De particular interĆ©s son los datos sobre relaciones de afecto positivo (āagradoā), en los que se preguntó a cada monje si tenĆa relaciones positivas con cada uno de los demĆ”s monjes. Cada monje clasificó solo sus tres opciones principales (o cuatro, en el caso de empates) en āagradoā. AquĆ, consideramos que existe auna arista dirigida del monje A al monje B si A nombró a B entre estas opciones principales.
Sampson, S.~F. (1968), A novitiate in a period of change: An experimental and case study of relationships, Unpublished Ph.D.Ā dissertation, Department of Sociology, Cornell University.
# datos
data(sampson)
samplike
## Network attributes:
## vertices = 18
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## total edges= 88
## missing edges= 0
## non-missing edges= 88
##
## Vertex attribute names:
## cloisterville group vertex.names
##
## Edge attribute names:
## nominations
class(samplike)
## [1] "network"
# matriz de adyacencia
# no hay vertices conectados
A <- network::as.matrix.network.adjacency(x = samplike)
class(A)
## [1] "matrix" "array"
# tipo igraph
g <- igraph::graph_from_adjacency_matrix(adjmatrix = A, mode = "directed")
class(g)
## [1] "igraph"
# grafico
set.seed(42)
plot(g, vertex.size = 10, vertex.label = NA, edge.arrow.size = 0.5, main = "Monjes")
# ajuste del modelo para varios valores de G
fit1 <- ergmm(samplike ~ euclidean(d = 2, G = 1))
fit2 <- ergmm(samplike ~ euclidean(d = 2, G = 2))
fit3 <- ergmm(samplike ~ euclidean(d = 2, G = 3))
fit4 <- ergmm(samplike ~ euclidean(d = 2, G = 4))
# modelos
fits <- list(fit1, fit2, fit3, fit4)
# calcular BICs
bics <- reshape(as.data.frame(t(sapply(fits, function(x) c(G = x$model$G, unlist(bic.ergmm(x))[c("Y","Z","overall")])))),
list(c("Y","Z","overall")),
idvar = "G",
v.names = "BIC",
timevar = "Component",
times = c("likelihood","clustering","overall"),
direction = "long")
bics
## G Component BIC
## 1.likelihood 1 likelihood 252.58048
## 2.likelihood 2 likelihood 254.78205
## 3.likelihood 3 likelihood 253.76959
## 4.likelihood 4 likelihood 254.96454
## 1.clustering 1 clustering 127.62681
## 2.clustering 2 clustering 117.73970
## 3.clustering 3 clustering 83.56518
## 4.clustering 4 clustering 85.90003
## 1.overall 1 overall 380.20729
## 2.overall 2 overall 372.52174
## 3.overall 3 overall 337.33477
## 4.overall 4 overall 340.86457
# grafico de BIC vs no.de clusters
with(bics, interaction.plot(G, Component, BIC, type = "b", xlab = "Clusters", ylab = "BIC"))
# G optimo
bestG <- with(bics[bics$Component == "overall",], G[which.min(BIC)])
bestG
## [1] 3
# resumen del modelo para G = 3
summary(fit3)
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: samplike ~ euclidean(d = 2, G = 3)
## Attribute: edges
## Model: Bernoulli
## MCMC sample of size 4000, draws are 10 iterations apart, after burnin of 10000 iterations.
## Covariate coefficients posterior means:
## Estimate 2.5% 97.5% 2*min(Pr(>0),Pr(<0))
## (Intercept) 2.0019 1.3305 2.802 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Overall BIC: 337.3348
## Likelihood BIC: 253.7696
## Latent space/clustering BIC: 83.56518
##
## Covariate coefficients MKL:
## Estimate
## (Intercept) 1.1707
# grafico densidad variables latentes
plot(fit3, what = "density")
# grafico posiciones latentes
plot(fit3, what = "pmean", print.formula = F, main = "Media post. posiciones latentes")
# grafico posiciones latentes
plot(fit3, pie = TRUE, vertex.cex = 3, print.formula = F, main = "Media post. posiciones latentes")
# asignacion de los clusters
fit3$mcmc.mle$Z.K
## [1] 3 3 2 1 1 1 3 1 1 1 1 3 2 3 3 3 2 2
# validacion cruzada
B <- 10000
n <- igraph::vcount(g)
m <- n*(n-1)/2
set.seed(42)
perm.index <- sample(1:m)
nfolds <- 5
nmiss <- floor(m/nfolds)
Avec <- A[lower.tri(A)]
Avec.pred <- numeric(length(Avec))
set.seed(42)
for (i in 1:nfolds) {
# indices valores perdidos
if (i < nfolds) {
miss.index <- seq(from = (i-1)*nmiss + 1, to = i*nmiss, by = 1)
} else {
miss.index <- ((i-1)*nmiss + 1):m
}
A.miss.index <- perm.index[miss.index]
# conformar matriz de adyacencia con NAs
Avec.temp <- Avec
Avec.temp[A.miss.index] <- NA
Atemp <- matrix(0, n, n)
Atemp[lower.tri(Atemp)] <- Avec.temp
Atemp <- Atemp + t(Atemp)
# ajustar el modelo y predecir
lazlike <- network::as.network.matrix(x = Atemp)
fit <- ergmm(lazlike ~ euclidean(d = 2, G = 0), control = ergmm.control(sample.size = B, burnin = 10000, interval = 10))
# prediccion
pred <- matrix(0, n, n)
for (b in 1:B) {
bet <- fit$sample$beta[b]
for (ii in 2:n) {
for (jj in 1:(ii-1)){
lat <- sqrt(sum((fit$sample$Z[b,ii,] - fit$sample$Z[b,jj,])^2))
pred[ii,jj] <- pred[ii,jj] + expit(bet - lat)/B
}
}
}
pred.vec <- pred[lower.tri(pred)]
Avec.pred[A.miss.index] <- pred.vec[A.miss.index]
# fold
print(i)
}
# curva ROC
suppressMessages(suppressWarnings(library(ROCR)))
pred <- ROCR::prediction(predictions = Avec.pred, labels = Avec)
perf <- ROCR::performance(prediction.obj = pred, measure = "tpr", x.measure = "fpr")
plot(x = perf@x.values[[1]], y = perf@y.values[[1]], type = "l", col = 2, lwd = 3,
xlab = "Tasa de Falsos Positivos", ylab = "Tasa de verdaderos positivos", main = "Curva ROC")
# AUC
perf.auc <- ROCR::performance(prediction.obj = pred, measure = "auc")
perf.auc@y.values[[1]]
## [1] 0.7109378