#session componentes
##Importamos la base de datos bupa
bupa<-read.csv("https://raw.githubusercontent.com/VictorGuevaraP/Mineria-de-datos-2019-2/master/bupa.txt", sep = ",")
head(bupa)
## V1 V2 V3 V4 V5 V6 V7
## 1 85 92 45 27 31 0 1
## 2 85 64 59 32 23 0 2
## 3 86 54 33 16 54 0 2
## 4 91 78 34 24 36 0 2
## 5 87 70 12 28 10 0 2
## 6 98 55 13 17 17 0 2
str(bupa)
## 'data.frame': 345 obs. of 7 variables:
## $ V1: int 85 85 86 91 87 98 88 88 92 90 ...
## $ V2: int 92 64 54 78 70 55 62 67 54 60 ...
## $ V3: int 45 59 33 34 12 13 20 21 22 25 ...
## $ V4: int 27 32 16 24 28 17 17 11 20 19 ...
## $ V5: int 31 23 54 36 10 17 9 11 7 5 ...
## $ V6: num 0 0 0 0 0 0 0.5 0.5 0.5 0.5 ...
## $ V7: int 1 2 2 2 2 2 1 1 1 1 ...
#Todos los datos son cuantitativos
#Analisis a la base de datos
summary(bupa)
## V1 V2 V3 V4
## Min. : 65.00 Min. : 23.00 Min. : 4.00 Min. : 5.00
## 1st Qu.: 87.00 1st Qu.: 57.00 1st Qu.: 19.00 1st Qu.:19.00
## Median : 90.00 Median : 67.00 Median : 26.00 Median :23.00
## Mean : 90.16 Mean : 69.87 Mean : 30.41 Mean :24.64
## 3rd Qu.: 93.00 3rd Qu.: 80.00 3rd Qu.: 34.00 3rd Qu.:27.00
## Max. :103.00 Max. :138.00 Max. :155.00 Max. :82.00
## V5 V6 V7
## Min. : 5.00 Min. : 0.000 Min. :1.00
## 1st Qu.: 15.00 1st Qu.: 0.500 1st Qu.:1.00
## Median : 25.00 Median : 3.000 Median :2.00
## Mean : 38.28 Mean : 3.455 Mean :1.58
## 3rd Qu.: 46.00 3rd Qu.: 6.000 3rd Qu.:2.00
## Max. :297.00 Max. :20.000 Max. :2.00
#1 prueba de correlaciones
cor(bupa)
## V1 V2 V3 V4 V5 V6
## V1 1.00000000 0.04410300 0.14769505 0.1877652 0.2223145 0.31267960
## V2 0.04410300 1.00000000 0.07620761 0.1460565 0.1331404 0.10079606
## V3 0.14769505 0.07620761 1.00000000 0.7396749 0.5034353 0.20684793
## V4 0.18776515 0.14605655 0.73967487 1.0000000 0.5276259 0.27958777
## V5 0.22231449 0.13314040 0.50343525 0.5276259 1.0000000 0.34122396
## V6 0.31267960 0.10079606 0.20684793 0.2795878 0.3412240 1.00000000
## V7 -0.09107012 -0.09805018 -0.03500879 0.1573558 0.1463925 -0.02204853
## V7
## V1 -0.09107012
## V2 -0.09805018
## V3 -0.03500879
## V4 0.15735580
## V5 0.14639252
## V6 -0.02204853
## V7 1.00000000
library(corrplot)
corrplot(cor(bupa))
library(PerformanceAnalytics)
chart.Correlation(bupa)
#Siendo los circulos azules un gran grado de correlacion, el rojo menor grado y en blanco significa que no hay correlación
library(psych)
#Prueba general de correlaciones
cortest(cor(bupa))
## Tests of correlation matrices
## Call:cortest(R1 = cor(bupa))
## Chi Square value 208.01 with df = 21 with probability < 9.6e-33
#2 prueba de Bartlet (deterinante, matriz de identidad)
library(rela)
cortest.bartlett(cor(bupa), n=345)
## $chisq
## [1] 544.8724
##
## $p.value
## [1] 6.004754e-102
##
## $df
## [1] 21
#3 Prueba KMO
library(psych)
KMO(bupa)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = bupa)
## Overall MSA = 0.64
## MSA for each item =
## V1 V2 V3 V4 V5 V6 V7
## 0.70 0.53 0.59 0.63 0.81 0.73 0.23
##segun los resultados se justifica la realización del PCA, por su overall MSA es de 0.64 siendo mayor que 0.5
##Grafico de sedimentación
scree(bupa)
##segun el grafico de sedimentación deberia tomarse tres componentes
#Analisis paralelo
#otra forma para decidir cuantos componentes tomar, siendo tres componentes al igual que el anterior gráfico
fa.parallel(cor(bupa))
## Warning in fa.parallel(cor(bupa)): It seems as if you are using a
## correlation matrix, but have not specified the number of cases. The number
## of subjects is arbitrarily set to be 100
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : A loading greater than abs(1) was detected. Examine the loadings
## carefully.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs
## = np.obs, : The estimated weights for the factor scores are probably
## incorrect. Try a different factor extraction method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : An ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 1 and the number of components = 1
componentes=prcomp(bupa, scale=TRUE, center = T)
componentes
## Standard deviations (1, .., p=7):
## [1] 1.5837765 1.0926330 1.0046350 0.9465752 0.8188865 0.7061828 0.4724823
##
## Rotation (n x k) = (7 x 7):
## PC1 PC2 PC3 PC4 PC5 PC6
## V1 0.26093155 0.4869910 0.49039467 -0.02430417 0.67017404 0.04830950
## V2 0.14769977 0.3252950 -0.66587263 -0.62523056 0.15949171 0.06888411
## V3 0.50668951 -0.1526510 -0.23040936 0.41245134 0.03558136 0.19739104
## V4 0.53762897 -0.2217274 -0.14693268 0.13167289 0.08655906 0.36430265
## V5 0.49239652 -0.1143955 0.03814116 -0.10572417 -0.07449247 -0.84970661
## V6 0.34359042 0.3470071 0.37146594 -0.27134023 -0.69137189 0.25772936
## V7 0.06173834 -0.6716080 0.31938586 -0.57986125 0.18200666 0.18115126
## PC7
## V1 0.04700907
## V2 0.08880155
## V3 0.67566973
## V4 -0.69473462
## V5 -0.06539520
## V6 0.07416000
## V7 0.20234232
##Se aprecia que son 7 variables por es la cantidad de campos que tiene la base de datos
summary(componentes)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.5838 1.0926 1.0046 0.9466 0.8189 0.70618 0.47248
## Proportion of Variance 0.3583 0.1706 0.1442 0.1280 0.0958 0.07124 0.03189
## Cumulative Proportion 0.3583 0.5289 0.6731 0.8011 0.8969 0.96811 1.00000
##Gráfico de los componentes
plot(componentes)
componentes$rotation
## PC1 PC2 PC3 PC4 PC5 PC6
## V1 0.26093155 0.4869910 0.49039467 -0.02430417 0.67017404 0.04830950
## V2 0.14769977 0.3252950 -0.66587263 -0.62523056 0.15949171 0.06888411
## V3 0.50668951 -0.1526510 -0.23040936 0.41245134 0.03558136 0.19739104
## V4 0.53762897 -0.2217274 -0.14693268 0.13167289 0.08655906 0.36430265
## V5 0.49239652 -0.1143955 0.03814116 -0.10572417 -0.07449247 -0.84970661
## V6 0.34359042 0.3470071 0.37146594 -0.27134023 -0.69137189 0.25772936
## V7 0.06173834 -0.6716080 0.31938586 -0.57986125 0.18200666 0.18115126
## PC7
## V1 0.04700907
## V2 0.08880155
## V3 0.67566973
## V4 -0.69473462
## V5 -0.06539520
## V6 0.07416000
## V7 0.20234232
##Grafico para mostrar el ortonormal
biplot(componentes, scale=0)
#Extraemos los componentes
componentes_prin=componentes$x
componentes_prin=componentes_prin[,1:3]
##Primeras 6 filas
head(componentes_prin)
## PC1 PC2 PC3
## [1,] -0.1390785 0.11105334 -2.3448572
## [2,] 0.2907050 -1.94038272 -0.9286607
## [3,] -0.8721346 -1.54263788 0.1152322
## [4,] -0.1580974 -0.70132800 -0.3506241
## [5,] -1.1408941 -1.12133672 -0.3251554
## [6,] -1.0901984 0.03114855 1.5875375
#Exportamos los componentes
write.csv(componentes_prin, file ="componentes_bupa.csv")
#En donde se encuentra nuestro archivo
getwd()
## [1] "C:/Users/Dolly/Desktop/Universidad UA/VII ciclo/Data Mining/trabajos en r/algoritmos"
componentes_prin=as.data.frame(componentes_prin)
# Con los 3 cluster escogidos anteriormente
clustering=kmeans(componentes_prin, 3)
clustering
## K-means clustering with 3 clusters of sizes 193, 115, 37
##
## Cluster means:
## PC1 PC2 PC3
## 1 -0.74656222 -0.2109173 -0.4741026
## 2 0.07698215 0.4504387 0.8396048
## 3 3.65496112 -0.2998216 -0.1365610
##
## Clustering vector:
## [1] 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1
## [36] 3 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 3 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1
## [71] 1 1 1 1 1 1 3 2 1 2 1 1 1 2 3 2 1 1 1 1 1 1 2 1 1 2 1 1 1 2 1 2 1 1 1
## [106] 2 1 1 1 1 1 1 1 1 3 1 2 1 1 2 2 1 1 1 1 1 2 2 1 1 2 2 3 3 1 1 1 2 2 2
## [141] 2 2 2 2 2 2 2 3 2 2 3 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 3 3 3 2 1 2 2 2 3
## [176] 2 2 2 3 2 3 2 3 2 2 3 3 2 3 3 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [211] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 3 1 1 2 2 1 1 1 1 1 1 1 1
## [246] 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 2 1 2 1 1 2 1 1 3 2 2
## [281] 1 2 1 1 2 3 2 2 1 2 2 1 2 2 3 2 1 1 2 3 2 2 2 2 2 2 3 2 2 2 2 3 2 2 2
## [316] 3 3 2 2 2 1 2 3 1 2 2 2 2 2 1 3 2 2 3 2 2 1 2 2 3 2 3 3 2 3
##
## Within cluster sum of squares by cluster:
## [1] 424.5773 241.1184 192.1475
## (between_SS / total_SS = 47.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
#Gráfico de los Clusters
plot(componentes_prin$PC1, componentes_prin$PC2, col=clustering$cluster)
library(rgl)
#Se puede apreciar mejor en 3D
plot3d(x=componentes_prin$PC1,
componentes_prin$PC2, componentes_prin$PC3, col = clustering$cluster)
###Método de Slope
wss=as.numeric()
for (k in 2:10){
agrupa=kmeans(bupa, k)
wss[k-1]=agrupa$tot.withinss
}
plot(2:10, wss, type = "b")
#Se puede apreciar el cluster óptimo sería de 3
#Clustering Fanny
library(cluster)
library(rgl)
bupa_agrupa=fanny(x = bupa, diss = FALSE, k = 3, metric = "euclidean", stand = FALSE)
bupa_agrupa
## Fuzzy Clustering object of class 'fanny' :
## m.ship.expon. 2
## objective 3081.451
## tolerance 1e-15
## iterations 80
## converged 1
## maxit 500
## n 345
## Membership coefficients (in %, rounded):
## [,1] [,2] [,3]
## [1,] 36 32 32
## [2,] 33 34 34
## [3,] 38 31 31
## [4,] 33 34 34
## [5,] 21 40 40
## [6,] 20 40 40
## [7,] 18 41 41
## [8,] 19 40 40
## [9,] 20 40 40
## [10,] 19 40 40
## [11,] 20 40 40
## [12,] 18 41 41
## [13,] 31 34 34
## [14,] 20 40 40
## [15,] 19 40 40
## [16,] 22 39 39
## [17,] 18 41 41
## [18,] 21 40 40
## [19,] 40 30 30
## [20,] 32 34 34
## [21,] 22 39 39
## [22,] 29 35 35
## [23,] 20 40 40
## [24,] 18 41 41
## [25,] 46 27 27
## [26,] 21 40 40
## [27,] 22 39 39
## [28,] 18 41 41
## [29,] 30 35 35
## [30,] 24 38 38
## [31,] 22 39 39
## [32,] 32 34 34
## [33,] 26 37 37
## [34,] 37 32 32
## [35,] 18 41 41
## [36,] 38 31 31
## [37,] 29 35 35
## [38,] 24 38 38
## [39,] 29 35 35
## [40,] 27 36 36
## [41,] 41 30 30
## [42,] 35 32 32
## [43,] 29 36 36
## [44,] 23 38 38
## [45,] 18 41 41
## [46,] 17 41 41
## [47,] 20 40 40
## [48,] 36 32 32
## [49,] 19 40 40
## [50,] 30 35 35
## [51,] 19 41 41
## [52,] 18 41 41
## [53,] 44 28 28
## [54,] 30 35 35
## [55,] 23 39 39
## [56,] 19 41 41
## [57,] 23 39 39
## [58,] 20 40 40
## [59,] 28 36 36
## [60,] 18 41 41
## [61,] 27 36 36
## [62,] 19 40 40
## [63,] 23 38 38
## [64,] 20 40 40
## [65,] 23 39 39
## [66,] 20 40 40
## [67,] 21 39 39
## [68,] 21 39 39
## [69,] 23 38 38
## [70,] 17 41 41
## [71,] 42 29 29
## [72,] 29 36 36
## [73,] 19 41 41
## [74,] 22 39 39
## [75,] 18 41 41
## [76,] 33 34 34
## [77,] 41 30 30
## [78,] 24 38 38
## [79,] 21 40 40
## [80,] 27 37 37
## [81,] 44 28 28
## [82,] 37 31 31
## [83,] 32 34 34
## [84,] 19 40 40
## [85,] 36 32 32
## [86,] 19 40 40
## [87,] 26 37 37
## [88,] 19 41 41
## [89,] 21 40 40
## [90,] 23 38 38
## [91,] 19 40 40
## [92,] 21 39 39
## [93,] 33 34 34
## [94,] 18 41 41
## [95,] 26 37 37
## [96,] 22 39 39
## [97,] 41 30 30
## [98,] 43 28 28
## [99,] 26 37 37
## [100,] 21 39 39
## [101,] 19 41 41
## [102,] 42 29 29
## [103,] 18 41 41
## [104,] 23 39 39
## [105,] 27 36 36
## [106,] 26 37 37
## [107,] 29 36 36
## [108,] 32 34 34
## [109,] 22 39 39
## [110,] 40 30 30
## [111,] 29 36 36
## [112,] 26 37 37
## [113,] 23 38 38
## [114,] 22 39 39
## [115,] 40 30 30
## [116,] 20 40 40
## [117,] 24 38 38
## [118,] 18 41 41
## [119,] 27 36 36
## [120,] 18 41 41
## [121,] 38 31 31
## [122,] 31 34 34
## [123,] 33 33 33
## [124,] 21 39 39
## [125,] 33 33 33
## [126,] 19 40 40
## [127,] 37 31 31
## [128,] 44 28 28
## [129,] 22 39 39
## [130,] 20 40 40
## [131,] 23 38 38
## [132,] 17 41 41
## [133,] 45 28 28
## [134,] 39 30 30
## [135,] 18 41 41
## [136,] 23 39 39
## [137,] 18 41 41
## [138,] 18 41 41
## [139,] 43 28 28
## [140,] 20 40 40
## [141,] 24 38 38
## [142,] 21 40 40
## [143,] 18 41 41
## [144,] 30 35 35
## [145,] 21 39 39
## [146,] 42 29 29
## [147,] 38 31 31
## [148,] 45 28 28
## [149,] 17 42 42
## [150,] 18 41 41
## [151,] 44 28 28
## [152,] 33 34 34
## [153,] 20 40 40
## [154,] 25 38 38
## [155,] 44 28 28
## [156,] 41 30 30
## [157,] 42 29 29
## [158,] 42 29 29
## [159,] 42 29 29
## [160,] 18 41 41
## [161,] 43 29 29
## [162,] 21 40 40
## [163,] 24 38 38
## [164,] 36 32 32
## [165,] 19 41 41
## [166,] 22 39 39
## [167,] 45 27 27
## [168,] 43 28 28
## [169,] 45 28 28
## [170,] 41 30 30
## [171,] 33 34 34
## [172,] 42 29 29
## [173,] 17 41 41
## [174,] 27 37 37
## [175,] 43 29 29
## [176,] 41 30 30
## [177,] 44 28 28
## [178,] 18 41 41
## [179,] 39 31 31
## [180,] 37 32 32
## [181,] 43 28 28
## [182,] 42 29 29
## [183,] 38 31 31
## [184,] 20 40 40
## [185,] 44 28 28
## [186,] 42 29 29
## [187,] 44 28 28
## [188,] 40 30 30
## [189,] 44 28 28
## [190,] 39 31 31
## [191,] 19 40 40
## [192,] 18 41 41
## [193,] 36 32 32
## [194,] 27 37 37
## [195,] 21 40 40
## [196,] 23 38 38
## [197,] 22 39 39
## [198,] 21 40 40
## [199,] 23 39 39
## [200,] 27 37 37
## [201,] 18 41 41
## [202,] 20 40 40
## [203,] 43 29 29
## [204,] 25 37 37
## [205,] 42 29 29
## [206,] 30 35 35
## [207,] 19 41 41
## [208,] 25 38 38
## [209,] 26 37 37
## [210,] 23 39 39
## [211,] 33 33 33
## [212,] 25 38 38
## [213,] 34 33 33
## [214,] 31 34 34
## [215,] 18 41 41
## [216,] 30 35 35
## [217,] 24 38 38
## [218,] 42 29 29
## [219,] 22 39 39
## [220,] 38 31 31
## [221,] 33 33 33
## [222,] 22 39 39
## [223,] 18 41 41
## [224,] 24 38 38
## [225,] 19 41 41
## [226,] 18 41 41
## [227,] 36 32 32
## [228,] 46 27 27
## [229,] 41 29 29
## [230,] 43 29 29
## [231,] 22 39 39
## [232,] 22 39 39
## [233,] 38 31 31
## [234,] 30 35 35
## [235,] 42 29 29
## [236,] 24 38 38
## [237,] 20 40 40
## [238,] 22 39 39
## [239,] 19 41 41
## [240,] 19 40 40
## [241,] 20 40 40
## [242,] 19 40 40
## [243,] 21 40 40
## [244,] 30 35 35
## [245,] 20 40 40
## [246,] 21 40 40
## [247,] 17 41 41
## [248,] 21 40 40
## [249,] 19 41 41
## [250,] 46 27 27
## [251,] 34 33 33
## [252,] 43 28 28
## [253,] 18 41 41
## [254,] 40 30 30
## [255,] 38 31 31
## [256,] 21 39 39
## [257,] 23 39 39
## [258,] 18 41 41
## [259,] 19 40 40
## [260,] 24 38 38
## [261,] 44 28 28
## [262,] 27 37 37
## [263,] 24 38 38
## [264,] 23 39 39
## [265,] 46 27 27
## [266,] 30 35 35
## [267,] 24 38 38
## [268,] 34 33 33
## [269,] 29 35 35
## [270,] 17 41 41
## [271,] 20 40 40
## [272,] 23 39 39
## [273,] 20 40 40
## [274,] 18 41 41
## [275,] 18 41 41
## [276,] 29 36 36
## [277,] 45 27 27
## [278,] 40 30 30
## [279,] 26 37 37
## [280,] 37 31 31
## [281,] 20 40 40
## [282,] 22 39 39
## [283,] 19 40 40
## [284,] 19 40 40
## [285,] 19 40 40
## [286,] 40 30 30
## [287,] 23 38 38
## [288,] 24 38 38
## [289,] 38 31 31
## [290,] 32 34 34
## [291,] 28 36 36
## [292,] 22 39 39
## [293,] 18 41 41
## [294,] 43 29 29
## [295,] 44 28 28
## [296,] 36 32 32
## [297,] 25 37 37
## [298,] 44 28 28
## [299,] 18 41 41
## [300,] 38 31 31
## [301,] 25 38 38
## [302,] 21 40 40
## [303,] 22 39 39
## [304,] 33 34 34
## [305,] 39 31 31
## [306,] 24 38 38
## [307,] 44 28 28
## [308,] 26 37 37
## [309,] 19 41 41
## [310,] 35 32 32
## [311,] 44 28 28
## [312,] 46 27 27
## [313,] 34 33 33
## [314,] 17 41 41
## [315,] 27 37 37
## [316,] 39 31 31
## [317,] 41 30 30
## [318,] 24 38 38
## [319,] 36 32 32
## [320,] 39 31 31
## [321,] 33 33 33
## [322,] 30 35 35
## [323,] 38 31 31
## [324,] 21 39 39
## [325,] 21 40 40
## [326,] 35 33 33
## [327,] 34 33 33
## [328,] 24 38 38
## [329,] 36 32 32
## [330,] 31 34 34
## [331,] 38 31 31
## [332,] 39 31 31
## [333,] 28 36 36
## [334,] 43 29 29
## [335,] 38 31 31
## [336,] 31 34 34
## [337,] 34 33 33
## [338,] 38 31 31
## [339,] 25 38 38
## [340,] 42 29 29
## [341,] 34 33 33
## [342,] 39 31 31
## [343,] 46 27 27
## [344,] 22 39 39
## [345,] 43 28 28
## Fuzzyness coefficients:
## dunn_coeff normalized
## 0.34926633 0.02389949
## Closest hard clustering:
## [1] 1 2 1 2 3 3 3 3 3 3 3 3 2 3 3 3 3 3 1 2 3 3 3 3 1 3 3 3 2 3 3 2 2 1 3
## [36] 1 2 3 2 2 1 1 2 3 3 3 3 1 3 2 3 3 1 2 3 3 3 3 2 3 3 3 3 3 2 3 3 3 3 3
## [71] 1 2 3 3 3 2 1 2 3 2 1 1 2 3 1 3 3 3 3 3 3 3 2 3 3 3 1 1 2 3 3 1 3 3 2
## [106] 3 2 2 3 1 3 2 3 3 1 3 3 3 2 3 1 2 2 3 2 3 1 1 3 3 3 3 1 1 3 3 3 3 1 3
## [141] 3 3 3 2 3 1 1 1 3 3 1 2 3 3 1 1 1 1 1 3 1 3 3 1 3 3 1 1 1 1 2 1 3 3 1
## [176] 1 1 3 1 1 1 1 1 3 1 1 1 1 1 1 3 3 1 2 3 3 3 3 3 2 3 3 1 2 1 2 3 2 2 3
## [211] 1 2 1 2 3 2 3 1 3 1 1 3 3 3 3 3 1 1 1 1 3 3 1 2 1 3 3 3 3 3 3 3 3 3 3
## [246] 3 3 3 3 1 1 1 3 1 1 3 3 3 3 2 1 2 3 3 1 2 3 1 2 3 3 3 3 3 3 2 1 1 3 1
## [281] 3 3 3 3 3 1 3 3 1 2 2 3 3 1 1 1 3 1 3 1 3 3 3 2 1 2 1 3 3 1 1 1 1 3 3
## [316] 1 1 3 1 1 2 2 1 3 3 1 1 3 1 2 1 1 2 1 1 2 1 1 3 1 1 1 1 3 1
##
## Available components:
## [1] "membership" "coeff" "memb.exp" "clustering" "k.crisp"
## [6] "objective" "convergence" "diss" "call" "silinfo"
## [11] "data"
###El valor del coeficiente de Dunn_coeff normalizado entre 0.3493 a 0.0239, siendo un valor cercano a 0 lo cual indica que indican que la estructura tiene un alto nivel fanny.
head(bupa_agrupa$clustering)
## [1] 1 2 1 2 3 3
plot(bupa_agrupa)
###En el gráfico existe un 52.89% de variabilidad entre los puntos.
###En la silhouette se tiene un 0.15
#Clustering CLARA
##Para este cluster si necesita la libreria de cluster, factorextra y ggplot2
library(cluster)
library(factoextra)
library(ggplot2)
clara_clusterbupa=clara(bupa, k =3, metric ="manhattan", stand = TRUE, samples = 50, pamLike =TRUE)
##Se observa sus valores, estando sus valores medios, su objetivo de función es 5.2736
clara_clusterbupa
## Call: clara(x = bupa, k = 3, metric = "manhattan", stand = TRUE, samples = 50, pamLike = TRUE)
## Medoids:
## V1 V2 V3 V4 V5 V6 V7
## [1,] 90 73 34 21 22 2.0 1
## [2,] 90 63 24 24 24 0.5 2
## [3,] 93 84 58 47 62 7.0 2
## Objective function: 5.273574
## Clustering vector: int [1:345] 1 2 2 2 2 2 1 1 1 1 2 1 1 1 1 1 1 1 ...
## Cluster sizes: 131 171 43
## Best sample:
## [1] 18 24 30 37 43 45 60 74 89 97 100 101 110 114 119 120 129
## [18] 135 141 149 152 169 178 179 181 187 195 199 203 220 222 227 230 234
## [35] 250 258 263 266 270 278 284 299 310 314 334 336
##
## Available components:
## [1] "sample" "medoids" "i.med" "clustering" "objective"
## [6] "clusinfo" "diss" "call" "silinfo" "data"
#Grafico de los 3 cluster
fviz_cluster(object = clara_clusterbupa, ellipse.type ="t", geom="point", pointsize = 2.5) +
theme_bw()+
labs(title= "Resultados clustering CLARA")
theme(legend.position = "none")
## List of 1
## $ legend.position: chr "none"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE