bupa=read.csv("https://raw.githubusercontent.com/VictorGuevaraP/Mineria-de-datos-2019-2/master/bupa.txt", sep = ",")

01 Prueba de Correlacion

head(bupa)

##   V1 V2 V3 V4 V5 V6 V7
## 1 85 92 45 27 31  0  1
## 2 85 64 59 32 23  0  2
## 3 86 54 33 16 54  0  2
## 4 91 78 34 24 36  0  2
## 5 87 70 12 28 10  0  2
## 6 98 55 13 17 17  0  2

cor(bupa)

##             V1          V2          V3        V4        V5          V6
## V1  1.00000000  0.04410300  0.14769505 0.1877652 0.2223145  0.31267960
## V2  0.04410300  1.00000000  0.07620761 0.1460565 0.1331404  0.10079606
## V3  0.14769505  0.07620761  1.00000000 0.7396749 0.5034353  0.20684793
## V4  0.18776515  0.14605655  0.73967487 1.0000000 0.5276259  0.27958777
## V5  0.22231449  0.13314040  0.50343525 0.5276259 1.0000000  0.34122396
## V6  0.31267960  0.10079606  0.20684793 0.2795878 0.3412240  1.00000000
## V7 -0.09107012 -0.09805018 -0.03500879 0.1573558 0.1463925 -0.02204853
##             V7
## V1 -0.09107012
## V2 -0.09805018
## V3 -0.03500879
## V4  0.15735580
## V5  0.14639252
## V6 -0.02204853
## V7  1.00000000

library(VIM)
library(corrplot)
library(psych)
library(PerformanceAnalytics)

corrplot(cor(bupa))

chart.Correlation(bupa)

## En la primera prueba observamos que si hay correlacion con algunas variables esto es una buena señal para seguir analizando y ver si deberia hacerse un Analisis de componentes Principales

02 Prueba de Correlacion

# PRUEBA GENERAL DE CORRELACIONES
# si la probabilidad es diferente de sero rechazo h sub 0
library(psych)
cortest(cor(bupa))

## Warning in cortest(cor(bupa)): n not specified, 100 used

## Tests of correlation matrices 
## Call:cortest(R1 = cor(bupa))
##  Chi Square value 208.01  with df =  21   with probability < 9.6e-33

#2 Prueba de Bartlet (n = cantidad de datos)
library(rela)
cortest.bartlett(cor(bupa), n=345)

## $chisq
## [1] 544.8724
## 
## $p.value
## [1] 6.004754e-102
## 
## $df
## [1] 21

En la segunda prueba logramos observar que la correlacion es diferente de 0 por lo tanto si se deberia de hacer un Analisis de componentes Principales

Tambien Observamos que el p.value es mayor a 0.5

03 Prueba de Correlacion

#3 Prueba KMO
library(psych)

KMO(bupa)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = bupa)
## Overall MSA =  0.64
## MSA for each item = 
##   V1   V2   V3   V4   V5   V6   V7 
## 0.70 0.53 0.59 0.63 0.81 0.73 0.23

Finalmente en la tercera prueba comprobamos que el KMO es de 0.64 por lo tanto al ser mayor que 0.5 reafirmamos que si deberia de hacerse un Analisis de componentes Principales

ANALISIS DE COMPONENTES PRINCIPALES

# Grafico de Sedimentacion
scree(bupa)

#Analisis paralelo
fa.parallel((bupa), fa="pc")

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2

Segun los graficos nos indica que lo mas favorable es quedarnos con 3 componentes principales

componentes=prcomp(bupa, scale=TRUE, center =T)
componentes

## Standard deviations (1, .., p=7):
## [1] 1.5837765 1.0926330 1.0046350 0.9465752 0.8188865 0.7061828 0.4724823
## 
## Rotation (n x k) = (7 x 7):
##           PC1        PC2         PC3         PC4         PC5         PC6
## V1 0.26093155  0.4869910  0.49039467 -0.02430417  0.67017404  0.04830950
## V2 0.14769977  0.3252950 -0.66587263 -0.62523056  0.15949171  0.06888411
## V3 0.50668951 -0.1526510 -0.23040936  0.41245134  0.03558136  0.19739104
## V4 0.53762897 -0.2217274 -0.14693268  0.13167289  0.08655906  0.36430265
## V5 0.49239652 -0.1143955  0.03814116 -0.10572417 -0.07449247 -0.84970661
## V6 0.34359042  0.3470071  0.37146594 -0.27134023 -0.69137189  0.25772936
## V7 0.06173834 -0.6716080  0.31938586 -0.57986125  0.18200666  0.18115126
##            PC7
## V1  0.04700907
## V2  0.08880155
## V3  0.67566973
## V4 -0.69473462
## V5 -0.06539520
## V6  0.07416000
## V7  0.20234232

summary(componentes)

## Importance of components:
##                           PC1    PC2    PC3    PC4    PC5     PC6     PC7
## Standard deviation     1.5838 1.0926 1.0046 0.9466 0.8189 0.70618 0.47248
## Proportion of Variance 0.3583 0.1706 0.1442 0.1280 0.0958 0.07124 0.03189
## Cumulative Proportion  0.3583 0.5289 0.6731 0.8011 0.8969 0.96811 1.00000

plot(componentes)

biplot(componentes, scale=2)

## Warning in biplot.prcomp(componentes, scale = 2): 'scale' is outside [0, 1]

# Graficamos los componentes y se vizualizan los 3 componentes agrupados

componentes_prin=componentes$x
componentes_prin=componentes_prin[,1:3]
head(componentes_prin)

##             PC1         PC2        PC3
## [1,] -0.1390785  0.11105334 -2.3448572
## [2,]  0.2907050 -1.94038272 -0.9286607
## [3,] -0.8721346 -1.54263788  0.1152322
## [4,] -0.1580974 -0.70132800 -0.3506241
## [5,] -1.1408941 -1.12133672 -0.3251554
## [6,] -1.0901984  0.03114855  1.5875375

Extraemos los componentes agrupados

CLUSTERING Kmeans

componentes_prin=as.data.frame(componentes_prin)

clustering=kmeans(componentes_prin, 3)
clustering

## K-means clustering with 3 clusters of sizes 42, 156, 147
## 
## Cluster means:
##          PC1        PC2         PC3
## 1  3.4273531 -0.2353520 -0.02487045
## 2 -0.5857313 -0.7541420  0.41775506
## 3 -0.3576513  0.8675573 -0.43622605
## 
## Clustering vector:
##   [1] 3 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 2 3 3 3 3 3
##  [36] 1 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 3 3 2 2 3 2
##  [71] 3 3 3 3 3 3 1 2 2 2 3 2 3 2 1 2 2 2 3 3 3 2 3 3 3 3 2 2 2 2 2 2 3 3 3
## [106] 2 3 3 3 3 2 2 2 2 1 2 2 2 3 2 1 2 3 2 3 2 2 2 2 2 3 3 1 1 2 2 2 2 2 2
## [141] 3 3 3 3 3 3 3 1 3 3 1 3 2 2 2 2 1 3 3 2 3 2 2 2 2 2 1 1 1 3 3 3 3 3 1
## [176] 3 2 2 1 2 1 3 1 2 1 1 1 3 1 1 3 3 2 3 3 3 2 3 3 3 3 2 3 3 3 3 3 3 3 3
## [211] 3 3 3 3 3 3 3 3 2 3 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 3
## [246] 3 3 3 3 2 2 2 2 2 3 3 2 3 3 3 3 3 3 2 2 3 2 3 2 2 2 3 3 3 2 2 2 1 3 2
## [281] 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 3 2 1 3 3 3 1 1 3 3 3
## [316] 1 1 3 2 3 3 2 1 2 3 1 3 3 3 2 1 3 3 1 3 3 3 1 3 1 3 1 1 3 1
## 
## Within cluster sum of squares by cluster:
## [1] 217.6258 273.8195 306.7057
##  (between_SS / total_SS =  50.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"

plot(componentes_prin$PC1,componentes_prin$PC2, col=clustering$cluster)

CLUSTERING FANNY

library(cluster)
library(factoextra)

bupa <- scale(bupa)
set.seed(111)
pam_clusters <- pam(bupa, k = 3, metric = "manhattan")
pam_clusters

## Medoids:
##       ID          V1         V2         V3          V4         V5
## [1,]  66 -0.26065541  0.3886289 -0.2257958 -0.46137226 -0.4912558
## [2,]  60 -0.03584012 -0.3744108 -0.3282952 -0.06393548 -0.3638822
## [3,] 169  0.63860576  0.7701487  1.4141947  2.22132601  0.6041568
##              V6         V7
## [1,] -0.7355283 -1.1727371
## [2,] -0.8853260  0.8502344
## [3,]  1.0620439  0.8502344
## Clustering vector:
##   [1] 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 3 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2
##  [71] 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1
## [106] 1 1 1 1 2 2 2 2 2 3 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 1 1 3 3 2 2 2 2 2 2
## [141] 1 1 1 1 1 1 1 3 1 1 3 2 2 2 2 2 3 3 3 2 3 2 2 2 2 2 3 3 3 1 1 1 1 1 3
## [176] 1 2 2 3 2 3 1 3 2 3 3 3 3 3 3 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [211] 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 1 1
## [246] 1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 2 2 2 3 1 2
## [281] 2 2 2 2 2 3 2 2 2 2 2 2 2 2 3 2 2 2 2 3 2 2 2 2 3 2 3 1 1 1 3 3 1 1 1
## [316] 3 3 1 2 2 2 2 3 2 1 1 1 1 1 2 2 2 2 3 1 1 2 3 2 3 1 3 3 1 3
## Objective function:
##    build     swap 
## 3.823487 3.818007 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

# FINALMENTE PODEMOS OBSERVAR LOS 3 CLUESTERING CREADOS POR EL METODO PAM.

fviz_cluster(object = pam_clusters, data = bupa) +
  theme_bw() +
  labs(title = "Resultados Cluestering PAM") +
  theme(legend.position =  "none")

E3

Yucra

14 de noviembre de 2019