Aulas 5.3 à 5.6

Cluster

Este material está disponível em: http://rpubs.com/leonardoreffatti.

Leitura de dados, resumo dos dados e plot de um Cluster simples.

Passo 1- Lendo os dados, resumo dos dados e carregando pacotes

setwd("C:/R/Curso do R/MODULO_5.1")
dados<-read.table("insetos.txt", h=T)
head(dados)
##      UA      Ambiente     Gado Temperatura Cobertura  Luz Flores sp.1 sp.2
## 1 área1 Mata_Primária Presente        21.8        73 53.5     14    4    0
## 2 área2 Mata_Primária  Ausente        21.3        62 61.0     28    5    0
## 3 área3 Mata_Primária Presente        23.8        57 71.5     17    1    0
## 4 área4 Mata_Primária Presente        23.2        49 69.5     29    3    0
## 5 área5 Mata_Primária Presente        21.5        59 61.5     15    5    0
## 6 área6 Mata_Primária  Ausente        21.7        78 53.0     24    1    0
##   sp.3 sp.4 sp.5 sp.6 sp.7 sp.8 sp.9 sp.10 sp.11 sp.12 sp.13 sp.14 sp.15
## 1    7    2    0    0    3    2    1     1     0     1     3     3     0
## 2    3    7    2    5    2    4    2     3     0     5     6     4     0
## 3    2    0    2    2    3    3    0     5     0     0     0     2     0
## 4    6    4    1   14    2    5    0     2     4     2     3    10     0
## 5    7    2    1    4    1    2    0     1     0     0     3     5     1
## 6    2    4    0    3    4    3    0     1     2     2     2     2     0
##   sp.16 sp.17 sp.18 sp.19 sp.20 sp.21 sp.22 sp.23 sp.24 sp.25 sp.26 sp.27
## 1     3     0     0     0     0     0     1     1     1     0     2     0
## 2     3     0     1     0     3     1     2     0     1     1     2     0
## 3     0     0     2     2     0     3     1     1     0     0     1     0
## 4     2     1     3     2     0     4     2     2     2     2     0     0
## 5     0     0     1     3     0     2     2     1     1     0     1     0
## 6     0     0     3     0     4     1     2     1     2     1     1     0
##   sp.28 sp.29 sp.30 sp.31 sp.32 sp.33 sp.34
## 1     1     0     0     0     1     0     0
## 2     0     0     0     5     1     0     0
## 3     0     0     0     0     1     0     0
## 4     0     0     0     0     0     0     0
## 5     0     1     0     0     1     1     1
## 6     0     0     0     0     1     0     0
attach(dados)
summary(dados)
##        UA                Ambiente        Gado     Temperatura   
##  área1  : 1   Mata_Primária  :17   Ausente :15   Min.   :19.70  
##  área10 : 1   Mata_Secundária:13   Presente:15   1st Qu.:21.55  
##  área11 : 1                                      Median :23.55  
##  área12 : 1                                      Mean   :24.59  
##  área13 : 1                                      3rd Qu.:27.18  
##  área14 : 1                                      Max.   :31.20  
##  (Other):24                                                     
##    Cobertura          Luz            Flores           sp.1      
##  Min.   :27.00   Min.   :53.00   Min.   : 5.00   Min.   :0.000  
##  1st Qu.:42.75   1st Qu.:61.12   1st Qu.:20.25   1st Qu.:1.250  
##  Median :50.50   Median :69.00   Median :26.50   Median :4.000  
##  Mean   :53.37   Mean   :67.95   Mean   :26.67   Mean   :3.433  
##  3rd Qu.:64.00   3rd Qu.:74.00   3rd Qu.:35.75   3rd Qu.:5.000  
##  Max.   :78.00   Max.   :84.00   Max.   :47.00   Max.   :6.000  
##                                                                 
##       sp.2           sp.3         sp.4           sp.5          sp.6      
##  Min.   :0.00   Min.   : 1   Min.   :0.00   Min.   :0.0   Min.   : 0.00  
##  1st Qu.:0.00   1st Qu.: 3   1st Qu.:2.00   1st Qu.:0.0   1st Qu.: 3.25  
##  Median :0.50   Median : 5   Median :4.00   Median :1.5   Median : 7.00  
##  Mean   :1.10   Mean   : 5   Mean   :3.80   Mean   :1.8   Mean   : 7.60  
##  3rd Qu.:1.75   3rd Qu.: 7   3rd Qu.:5.75   3rd Qu.:3.0   3rd Qu.:11.25  
##  Max.   :5.00   Max.   :10   Max.   :8.00   Max.   :5.0   Max.   :20.00  
##                                                                          
##       sp.7            sp.8          sp.9           sp.10      
##  Min.   :0.000   Min.   :0.0   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:2.0   1st Qu.:0.000   1st Qu.:0.250  
##  Median :2.000   Median :3.0   Median :2.000   Median :1.000  
##  Mean   :2.133   Mean   :3.3   Mean   :1.833   Mean   :1.667  
##  3rd Qu.:3.000   3rd Qu.:5.0   3rd Qu.:2.750   3rd Qu.:2.750  
##  Max.   :6.000   Max.   :7.0   Max.   :6.000   Max.   :6.000  
##                                                               
##      sp.11            sp.12           sp.13           sp.14      
##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   : 0.00  
##  1st Qu.: 0.000   1st Qu.:0.250   1st Qu.:2.000   1st Qu.: 4.00  
##  Median : 2.000   Median :2.000   Median :3.000   Median : 6.00  
##  Mean   : 2.967   Mean   :2.367   Mean   :3.167   Mean   : 6.10  
##  3rd Qu.: 5.750   3rd Qu.:3.750   3rd Qu.:4.750   3rd Qu.: 8.75  
##  Max.   :11.000   Max.   :9.000   Max.   :7.000   Max.   :13.00  
##                                                                  
##      sp.15         sp.16            sp.17            sp.18    
##  Min.   :0.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.0  
##  1st Qu.:0.0   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0  
##  Median :0.0   Median :0.0000   Median :0.0000   Median :0.0  
##  Mean   :1.2   Mean   :0.8667   Mean   :0.9667   Mean   :0.9  
##  3rd Qu.:2.0   3rd Qu.:1.7500   3rd Qu.:1.0000   3rd Qu.:2.0  
##  Max.   :5.0   Max.   :4.0000   Max.   :7.0000   Max.   :3.0  
##                                                               
##      sp.19            sp.20         sp.21           sp.22      
##  Min.   :0.0000   Min.   :0.0   Min.   :0.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:0.0   1st Qu.:2.000   1st Qu.:1.000  
##  Median :0.0000   Median :0.0   Median :3.000   Median :2.000  
##  Mean   :0.6667   Mean   :0.6   Mean   :2.467   Mean   :1.833  
##  3rd Qu.:1.0000   3rd Qu.:0.0   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :4.0000   Max.   :4.0   Max.   :4.000   Max.   :3.000  
##                                                                
##      sp.23            sp.24            sp.25       sp.26       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:1   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000   Median :1   Median :0.5000  
##  Mean   :0.9667   Mean   :0.5333   Mean   :1   Mean   :0.6333  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1   3rd Qu.:1.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2   Max.   :2.0000  
##                                                                
##      sp.27            sp.28            sp.29            sp.30    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0  
##  Mean   :0.1667   Mean   :0.2667   Mean   :0.2333   Mean   :0.1  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0  
##  Max.   :3.0000   Max.   :4.0000   Max.   :3.0000   Max.   :2.0  
##                                                                  
##      sp.31            sp.32            sp.33         sp.34       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0   Median :0.0000  
##  Mean   :0.2667   Mean   :0.6667   Mean   :0.2   Mean   :0.1333  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :1.0000   Max.   :1.0   Max.   :1.0000  
## 
str(dados)
## 'data.frame':    30 obs. of  41 variables:
##  $ UA         : Factor w/ 30 levels "área1","área10",..: 1 12 23 25 26 27 28 29 30 2 ...
##  $ Ambiente   : Factor w/ 2 levels "Mata_Primária",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Gado       : Factor w/ 2 levels "Ausente","Presente": 2 1 2 2 2 1 1 1 2 2 ...
##  $ Temperatura: num  21.8 21.3 23.8 23.2 21.5 21.7 20.3 19.7 24.7 20.2 ...
##  $ Cobertura  : int  73 62 57 49 59 78 64 74 48 75 ...
##  $ Luz        : num  53.5 61 71.5 69.5 61.5 53 58 56 71 60.5 ...
##  $ Flores     : int  14 28 17 29 15 24 21 29 20 5 ...
##  $ sp.1       : int  4 5 1 3 5 1 1 0 6 3 ...
##  $ sp.2       : int  0 0 0 0 0 0 3 1 4 0 ...
##  $ sp.3       : int  7 3 2 6 7 2 2 1 8 3 ...
##  $ sp.4       : int  2 7 0 4 2 4 5 3 3 1 ...
##  $ sp.5       : int  0 2 2 1 1 0 3 4 0 0 ...
##  $ sp.6       : int  0 5 2 14 4 3 3 9 8 0 ...
##  $ sp.7       : int  3 2 3 2 1 4 3 2 1 3 ...
##  $ sp.8       : int  2 4 3 5 2 3 0 0 1 0 ...
##  $ sp.9       : int  1 2 0 0 0 0 2 0 1 0 ...
##  $ sp.10      : int  1 3 5 2 1 1 0 0 1 1 ...
##  $ sp.11      : int  0 0 0 4 0 2 0 2 0 0 ...
##  $ sp.12      : int  1 5 0 2 0 2 4 5 0 0 ...
##  $ sp.13      : int  3 6 0 3 3 2 3 1 4 2 ...
##  $ sp.14      : int  3 4 2 10 5 2 2 5 8 0 ...
##  $ sp.15      : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ sp.16      : int  3 3 0 2 0 0 3 0 1 0 ...
##  $ sp.17      : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ sp.18      : int  0 1 2 3 1 3 0 0 2 0 ...
##  $ sp.19      : int  0 0 2 2 3 0 0 2 0 1 ...
##  $ sp.20      : int  0 3 0 0 0 4 0 2 0 0 ...
##  $ sp.21      : int  0 1 3 4 2 1 3 3 3 2 ...
##  $ sp.22      : int  1 2 1 2 2 2 1 2 2 2 ...
##  $ sp.23      : int  1 0 1 2 1 1 2 1 2 1 ...
##  $ sp.24      : int  1 1 0 2 1 2 0 1 1 1 ...
##  $ sp.25      : int  0 1 0 2 0 1 1 0 0 1 ...
##  $ sp.26      : int  2 2 1 0 1 1 2 1 1 1 ...
##  $ sp.27      : int  0 0 0 0 0 0 3 0 0 0 ...
##  $ sp.28      : int  1 0 0 0 0 0 0 0 0 2 ...
##  $ sp.29      : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ sp.30      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ sp.31      : int  0 5 0 0 0 0 1 0 0 0 ...
##  $ sp.32      : int  1 1 1 0 1 1 1 0 1 1 ...
##  $ sp.33      : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ sp.34      : int  0 0 0 0 1 0 0 0 0 0 ...
library(permute)
library(lattice)
library(vegan)
## This is vegan 2.5-2

Passo 2: Padronizar os valores das variáveis ambientais quando as escalas das unidades das variáveis são diferentes.

#padronização = (cada valor - média) / desvio padrão
#todas as variáveis variam na mesma magnitude, média = 0 e sd = 1
#as diferenças serão mantidas após a padronização
var.amb <- dados[ ,4:7]
var.amb.pad <- decostand(var.amb, method = "standardize")
plot(var.amb[,1:2])

plot(var.amb.pad[,1:2])

Passo 3: Criando a Matriz de distâncias pelo método euclidiano

dist.amb <- dist(var.amb.pad, method = "euclid")

Passo 4: Criando o Cluster

cluster.amb <- hclust(dist.amb, method = "average")
#method = "average" = UPGMA
plot(cluster.amb)

Passo 5: Customizando o Cluster - Definindo altura de corte de grupos

cluster.amb <- hclust(dist.amb, method = "average")
#method = "average" = UPGMA
plot(cluster.amb, hang = -1)
#para fazer um corte "agrupamentos" em uma determinada distância
rect.hclust(cluster.amb, h=2)

Passo 6: Customizando o Cluster - Definindo número de grupos e listando os agrupamentos formados

cluster.amb <- hclust(dist.amb, method = "average")
#method = "average" = UPGMA
plot(cluster.amb, hang = -1)
#para fazer"agrupamentos" definindo um número de grupos
rect.hclust(cluster.amb, k=3)

grupos <- cutree(cluster.amb, k=3)
grupos
##  [1] 1 1 2 3 1 1 1 1 2 1 1 2 2 1 1 2 1 3 3 3 3 3 3 3 3 3 3 3 3 3

Passo 7: Customizando o Cluster - Colocando o Cluster na Horizontal

cluster.amb <- hclust(dist.amb, method = "average")
#method = "average" = UPGMA
plot(as.dendrogram(cluster.amb), horiz = TRUE)

Passo 8: Customizando o Cluster - Identificando as Unidades Amostrais no Cluster

#inserindo os nomes das UAs no Cluster
setwd("C:/R/Curso do R/MODULO_5.1")
dados<-read.table("insetos.txt", h=T, row.names = 1)
attach(dados)
## The following objects are masked from dados (pos = 6):
## 
##     Ambiente, Cobertura, Flores, Gado, Luz, sp.1, sp.10, sp.11,
##     sp.12, sp.13, sp.14, sp.15, sp.16, sp.17, sp.18, sp.19, sp.2,
##     sp.20, sp.21, sp.22, sp.23, sp.24, sp.25, sp.26, sp.27, sp.28,
##     sp.29, sp.3, sp.30, sp.31, sp.32, sp.33, sp.34, sp.4, sp.5,
##     sp.6, sp.7, sp.8, sp.9, Temperatura
var.amb <- dados[ ,3:6]
var.amb.pad <- decostand(var.amb, method = "standardize")
dist.amb <- dist(var.amb.pad, method = "euclid")
cluster.amb <- hclust(dist.amb, method = "average")
plot(as.dendrogram(cluster.amb), horiz = TRUE)