Medidas de Asociación y Distancia


Para el cálculo de algunas medidas se requiere instalar el paquete vegan.

install.packages("vegan")


Indices de Asociación Binarios entre Columnas

yuyos <- read.table("yuyos.txt", T, "\t", row.names = 1)
##    A B C D E F
## 1  1 1 1 0 0 1
## 2  0 1 1 1 0 0
## 3  1 1 1 1 0 0
## 4  1 1 0 1 0 0
## 5  1 0 1 1 0 0
## 6  0 1 1 1 1 0
## 7  1 0 1 1 0 1
## 8  1 1 1 1 0 0
## 9  1 0 1 1 0 0
## 10 1 0 1 1 0 1


a). Miden alejamiento de la Independencia:

\( \bullet \phi \) de Pearson

cor(yuyos)
##          A       B        C        D          E          F
## A  1.00000 -0.2632 -0.13363 -0.05025 -4.082e-01  4.082e-01
## B -0.26318  1.0000  0.16705 -0.16862 -2.417e-01 -2.417e-01
## C -0.13363  0.1671  1.00000  0.17459  2.182e-01 -5.455e-02
## D -0.05025 -0.1686  0.17459  1.00000  1.846e-01 -4.308e-01
## E -0.40825 -0.2417  0.21822  0.18464  1.000e+00 -8.470e-21
## F  0.40825 -0.2417 -0.05455 -0.43082 -8.470e-21  1.000e+00


\( \bullet \) Yule

library(vegan)

designdist(t(yuyos), "(a*d - b*c)/(a*d + b*c)", abcd = T)
##         A       B       C       D       E
## B -0.5000                                
## C -0.3846  0.4634                        
## D -0.1613 -0.4857  0.5200                
## E -0.8065 -0.6000  1.0000  1.0000        
## F  1.0000 -0.6000 -0.1724 -0.8537  0.0000


b). Miden Copresencia:

\( \bullet \) Russel y Rao

designdist(t(yuyos), "J / P")
##      A    B    C    D    E
## B 0.20                    
## C 0.48 0.40               
## D 0.52 0.36 0.76          
## E 0.04 0.04 0.20 0.20     
## F 0.20 0.04 0.16 0.12 0.04


\( \bullet \) Braun-Blanquet

designdist(t(yuyos), "J / pmax(A, B)")


\( \bullet \) Dice

designdist(t(yuyos), "2*J / (A + B)")


\( \bullet \) Ochiai

designdist(t(yuyos), "J / sqrt(A * B)")


\( \bullet \) Kulczynski

designdist(t(yuyos), "(J/A + J/B)/2")


\( \bullet \) Simpson

designdist(t(yuyos), "J / pmin(A, B)")


\( \bullet \) Jaccard

designdist(t(yuyos), "1 - (A + B - 2*J) / (A + B - J)")




Indices de Asociación Binarios entre Filas

\( \bullet \) Simple Matching

designdist(yuyos[1:10, ], "(a + d) / P", abcd = T)
##         1      2      3      4      5      6      7      8      9
## 2  0.5000                                                        
## 3  0.6667 0.8333                                                 
## 4  0.5000 0.6667 0.8333                                          
## 5  0.5000 0.6667 0.8333 0.6667                                   
## 6  0.3333 0.8333 0.6667 0.5000 0.5000                            
## 7  0.6667 0.5000 0.6667 0.5000 0.8333 0.3333                     
## 8  0.6667 0.8333 1.0000 0.8333 0.8333 0.6667 0.6667              
## 9  0.5000 0.6667 0.8333 0.6667 1.0000 0.5000 0.8333 0.8333       
## 10 0.6667 0.5000 0.6667 0.5000 0.8333 0.3333 1.0000 0.6667 0.8333

NOTA: se muestran solo las primeras 10 filas por cuestión de espacio.




Indices de Asociación para Datos Cuantitativos entre Columnas

parasitos <- read.table("parasitos.txt", T, "\t")
##    Edad Ped Pir AC NEM
## 1     5   4  50  0   4
## 2     5   3  50  1   1
## 3     5  17  50  2   0
## 4     5   3  50  1   0
## 5     5  10  15  0   2
## 6     6   6  50  1   0
## 7     6  30  30  0   2
## 8     6  16  50  3   3
## 9     6   9  50  2   3
## 10    6  14  50  1   1


\( \bullet \) Coeficiente de Correlación

cor(parasitos)
##        Edad    Ped    Pir     AC    NEM
## Edad 1.0000 0.5339 0.3332 0.1966 0.5032
## Ped  0.5339 1.0000 0.4387 0.3623 0.4755
## Pir  0.3332 0.4387 1.0000 0.7428 0.4322
## AC   0.1966 0.3623 0.7428 1.0000 0.5011
## NEM  0.5032 0.4755 0.4322 0.5011 1.0000

Gráfico de correlaciones: Ver final del apunte.

\( \bullet \) Coeficiente de Correlación por Rangos de Spearman

cor(parasitos, method = "spearman")
##        Edad    Ped    Pir     AC    NEM
## Edad 1.0000 0.5531 0.4922 0.2297 0.4720
## Ped  0.5531 1.0000 0.4619 0.3101 0.3749
## Pir  0.4922 0.4619 1.0000 0.5579 0.5756
## AC   0.2297 0.3101 0.5579 1.0000 0.4739
## NEM  0.4720 0.3749 0.5756 0.4739 1.0000




Indices de Asociación para Datos Cuantitativos entre Filas

algas <- read.table("algas.txt", T, "\t")
##    Scy Pcol Iri Ulv Cer Gel Gra Cor Pumb Lit
## 1    2    9   4   4   0   0   0   0    0   0
## 2    6    6   4   2   0   0   0   0    0   0
## 3    2    6   4   2   0   0   0   0    0   0
## 4    0    2   7   4   0   0   0   0    0   0
## 5    0    0   8   2   0   2   2   2    2   0
## 6    0    2   9   6   2   0   0   0    0   2
## 7    0    0   9   4   0   0   2   5    2   6
## 8    0    2   8   4   2   2   2   5    2   5
## 9    0    2   8   4   2   2   6   5    2   6
## 10   0    0   5   5   2   2   2   8    2   0
## 11   0    0   0   0   0   2   2   4    0   9
## 12   0    0   0   0   2   2   6   5    2   8


\( \bullet \) Distancia Euclí­dea

dist(algas)
##        1     2     3     4     5     6     7     8     9    10    11
## 2   5.39                                                            
## 3   3.61  4.00                                                      
## 4   7.87  8.06  5.74                                                
## 5  11.00 10.20  8.49  5.00                                          
## 6   9.49 10.05  8.31  4.00  6.71                                    
## 7  13.38 13.04 11.75  8.77  7.35  7.81                              
## 8  11.62 11.75 10.30  8.19  6.78  7.14  3.74                        
## 9  13.34 13.45 12.21 10.49  8.54  9.49  5.39  4.12                  
## 10 12.92 12.73 11.40  9.43  7.62 10.05  8.37  6.93  8.66            
## 11 14.90 14.04 12.85 13.19 12.53 14.07 10.72 10.44 10.86 12.45      
## 12 15.94 15.13 14.04 14.35 12.69 15.03 11.18 10.44  9.38 11.79  5.10


\( \bullet \) Distancia Manhattan

dist(algas, "manhattan")


\( \bullet \) Distancia de Bray-Curtis

library(vegan)

vegdist(algas, "bray")


\( \bullet \) Distancia de Gower

vegdist(algas, "gower")


\( \bullet \) Distancia de Canberra

dist(algas, "canberra")/ncol(algas)


\( \bullet \) Distancia Cuerda de Orloci

dist(algas/apply(algas, 1, function(x) sqrt(x %*% x)))


\( \bullet \) Indice de Czekanowski

designdist(algas, "2*J/(A+B)", terms = "minimum")


\( \bullet \) Indice de Gower

1 - vegdist(algas, "gower")




Medidas de Asociación aplicadas a datos de Composición

cont.estom <- read.table("cont_estom.txt", T, "\t", row.names = 1)
##           Graps Peiso Poliq Pleot Misid Artem Anfip Molus Peces Isopo
## Paralich   38.6  10.0   0.0   4.3  10.0   1.4   0.0   1.4  62.8   0.0
## Mustelus   65.7   1.5  35.8   3.0   1.5   0.0   0.0   0.0   9.0   0.0
## Micropog   44.0  10.4  41.7   0.5   4.3   0.5  13.7   2.8   3.8   1.4
## Porichth    0.0  85.7   0.0   0.0  10.7   0.0   0.0   0.0   0.0   3.6
## Symptery   64.6  18.7  10.4  39.6  20.8  27.1   2.1   2.1   4.2   2.1
## Cynoscion   0.0  21.9   0.0  30.1  14.5  27.3   0.8   0.0   9.8   0.0
## Basilich   37.9   0.0   7.1   0.5  14.8   0.0  18.7  18.1   0.5   0.0
## Convertir los datos a composicionales
cont.estom <- cont.estom/rowSums(cont.estom)
##           Graps Peiso Poliq Pleot Misid Artem Anfip Molus Peces Isopo
## Paralich  0.300 0.078 0.000 0.033 0.078 0.011 0.000 0.011 0.489 0.000
## Mustelus  0.564 0.013 0.307 0.026 0.013 0.000 0.000 0.000 0.077 0.000
## Micropog  0.357 0.084 0.339 0.004 0.035 0.004 0.111 0.023 0.031 0.011
## Porichth  0.000 0.857 0.000 0.000 0.107 0.000 0.000 0.000 0.000 0.036
## Symptery  0.337 0.098 0.054 0.207 0.109 0.141 0.011 0.011 0.022 0.011
## Cynoscion 0.000 0.210 0.000 0.288 0.139 0.261 0.008 0.000 0.094 0.000


\( \bullet \) Indice de Morisita

designdist(cont.estom, "2*J/(A+B)", terms = "quadratic")
##           Paralich Mustelus Micropog Porichth Symptery Cynoscion
## Mustelus   0.55126                                              
## Micropog   0.43520  0.90513                                     
## Porichth   0.13768  0.02128  0.15127                            
## Symptery   0.50214  0.69719  0.66302  0.20159                   
## Cynoscion  0.30203  0.05960  0.11691  0.40095  0.63158          
## Basilich   0.44922  0.72821  0.75500  0.03253  0.69347   0.10334


\( \bullet \) Indice de Czekanowski

designdist(cont.estom, "J", terms = "minimum")


\( \bullet \) Indice de Matusita

designdist(sqrt(cont.estom), "J", terms = "quadratic")


\( \bullet \) Distancia de Matusita

dist(sqrt(cont.estom))
##           Paralich Mustelus Micropog Porichth Symptery Cynoscion
## Mustelus    0.7765                                              
## Micropog    0.8743   0.4799                                     
## Porichth    1.1406   1.3098   1.1398                            
## Symptery    0.7294   0.7043   0.6697   1.0801                   
## Cynoscion   0.8972   1.2120   1.1379   0.9530   0.6977          
## Basilich    0.9400   0.7835   0.5824   1.3211   0.7914    1.2298


\( \bullet \) Distancia de Aitchison

# Controlar que la matriz no contenga 0!!
if (all(cont.estom != 0)) {
    dist(t(apply(cont.estom, 1, function(x) log(x/sqrt(prod(x))))))
} else print("Matriz contiene 0")
## [1] "Matriz contiene 0"


\( \bullet \) Distancia Chi-cuadrado

f.k <- colSums(cont.estom)/sum(cont.estom)

dist(t(apply(cont.estom, 1, function(x) x/sqrt(f.k))))
##           Paralich Mustelus Micropog Porichth Symptery Cynoscion
## Mustelus    1.6826                                              
## Micropog    1.8485   0.7229                                     
## Porichth    2.4517   2.4581   2.2456                            
## Symptery    1.6863   1.3021   1.3583   2.0977                   
## Cynoscion   1.9599   2.0936   2.0294   2.1498   0.9511          
## Basilich    2.0462   1.6508   1.3419   2.5401   1.6051    2.1963




Matriz de correlación sombreada

library(lattice)

sombra <- c(-1, -0.75, -0.45, 0.45, 0.75, 1)
sombra <- c(-1, -0.75, -0.5, 0, 0.5, 0.75, 1)

levelplot(cor(mtcars), xlab = "", ylab = "", panel = function(...) {
    arg <- list(...)
    panel.levelplot(...)
    panel.text(arg$x, arg$y, round(arg$z, 2))
}, at = sombra, colorkey = list(at = sombra, labels = paste(sombra), tick.number = length(sombra)))

plot of chunk cor_tile

library(ggplot2); library(reshape2)

cor <- melt(cor(mtcars))

## Acomoda la diagonal (correlación 1) de arriba hacia abajo
cor$Var2 <- with(cor, factor(Var2, sort(levels(Var1), T), sort(levels(Var1), T)))

ggplot(cor, aes(Var1, Var2, fill=value))+
  geom_tile()+
  geom_text(aes(label=round(value,2)), size=3)+
  scale_fill_gradient2(mid='white', high='blue', low='red', limits=c(-1,1), name='Correlación')+
  geom_text(aes(label=round(value,2)), size=3)+ 
  xlab('')+ 
  ylab('')

plot of chunk cor_tile_ggplot