library(rio)
DATA2=import("dataOK_all - dataOK_all.csv")
cor(DATA2[,c(3:7)])
##                      Código pared1_Ladrillo pared2_Piedra pared3_Adobe
## Código           1.00000000      0.04737659   -0.02849307 -0.015606364
## pared1_Ladrillo  0.04737659      1.00000000    0.65313041  0.353719710
## pared2_Piedra   -0.02849307      0.65313041    1.00000000  0.202919582
## pared3_Adobe    -0.01560636      0.35371971    0.20291958  1.000000000
## pared4_Tapia    -0.14034685     -0.02844127   -0.04928220 -0.004149731
##                 pared4_Tapia
## Código          -0.140346850
## pared1_Ladrillo -0.028441269
## pared2_Piedra   -0.049282201
## pared3_Adobe    -0.004149731
## pared4_Tapia     1.000000000
DATAof=DATA2[,-c(2,44, 45)] 
library(cluster)
g.dist = daisy(DATAof, metric="gower")
## Warning in daisy(DATAof, metric = "gower"): binary variable(s) 45 treated as
## interval scaled
## para PAM

library(factoextra)
## Cargando paquete requerido: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(DATAof, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

library(kableExtra)
set.seed(123)
res.pam=pam(g.dist,3,cluster.only = F)

#nueva columna
DATAof$pam=res.pam$cluster

# ver

head(DATAof,15)%>%kbl()%>%kable_styling()
V1 Código pared1_Ladrillo pared2_Piedra pared3_Adobe pared4_Tapia pared5_Quincha pared6_Piedra pared7_Madera pared8_Triplay pared9_Otro pared10_Total techo1_Concreto techo2_Madera techo3_Tejas techo4_Planchas techo5_Caña techo6_Triplay techo7_Paja techo8_Otro techo9_Total piso1_Parquet piso2_Láminas piso3_Losetas piso4_Madera piso5_Cemento piso6_Tierra piso7_Otro piso8_Total agua1_Red agua2_Red_fueraVivienda agua3_Pilón agua4_Camión agua5_Pozo agua6_Manantial agua7_Río agua8_Otro agua9_Vecino agua10_Total elec1_Sí elec2_No elec3_Total Castillo Keiko ganaCastillo covidPositivos covidFallecidos pam
1 102 4633 46 6639 222 2518 127 4484 851 0 19520 2187 294 179 13186 160 106 3408 0 19520 6 19 647 157 7121 11569 1 19520 9429 4392 793 59 1792 270 2648 56 81 19520 13204 6316 19520 25629 10770 1 8126 462 1
2 103 1602 9 2729 240 157 36 2505 30 0 7308 692 75 382 6084 38 5 32 0 7308 5 2 165 132 2917 4087 0 7308 4569 1497 215 0 474 67 388 61 37 7308 6025 1283 7308 8374 5209 1 389 72 1
3 101 3782 22 5881 2476 309 168 1270 91 0 13999 2262 160 3393 8005 50 14 115 0 13999 23 36 1077 240 6189 6434 0 13999 10647 1619 184 49 876 92 488 24 20 13999 12248 1751 13999 15671 10473 1 2174 281 1
4 104 291 7 672 8 386 7 8145 200 0 9716 56 188 177 2036 15 10 7234 0 9716 2 0 20 1523 943 7228 0 9716 1307 867 1003 2 2564 431 3428 80 34 9716 1792 7924 9716 13154 1446 1 3481 111 1
5 105 430 7 5217 6052 346 54 606 45 0 12757 187 43 3071 9343 26 12 75 0 12757 4 0 46 295 1911 10501 0 12757 7172 3097 1112 0 819 132 369 9 47 12757 10886 1871 12757 12606 7840 1 456 88 1
6 106 1546 7 2778 155 720 28 3646 24 0 8904 480 48 2810 5495 15 5 51 0 8904 3 4 264 176 2974 5483 0 8904 5256 1278 154 0 1020 211 948 29 8 8904 6895 2009 8904 7967 5491 1 110 60 1
7 107 4727 35 17199 2964 1836 518 2714 210 0 30203 2595 340 308 26620 196 62 82 0 30203 20 32 940 328 10631 18252 0 30203 14712 8760 1308 117 2502 471 2052 104 177 30203 24395 5808 30203 36540 19222 1 3749 336 1
8 202 15 1 1763 70 7 65 2 18 0 1941 9 57 403 1297 10 17 148 0 1941 0 0 4 24 195 1718 0 1941 1451 42 10 0 230 121 76 2 9 1941 1528 413 1941 2325 1413 1 79 26 1
9 203 97 0 658 3014 7 7 3 0 0 3786 29 12 1146 2341 8 4 246 0 3786 0 0 16 17 314 3439 0 3786 3229 222 40 0 190 61 39 1 4 3786 3089 697 3786 5056 788 1 54 31 1
10 204 215 3 368 1701 4 6 4 1 0 2302 76 8 1893 314 5 3 3 0 2302 5 1 41 12 409 1834 0 2302 1642 444 4 0 124 27 49 6 6 2302 2032 270 2302 2860 827 1 59 21 1
11 205 506 12 3703 2332 27 183 14 41 0 6818 261 46 1365 4554 40 35 517 0 6818 6 8 91 86 1854 4773 0 6818 4930 479 113 0 579 242 456 11 8 6818 5375 1443 6818 7690 3994 1 242 96 1
12 206 1913 3 10846 140 9 15 48 29 0 13003 1543 118 5794 5376 68 60 44 0 13003 14 8 394 20 3212 9355 0 13003 9502 1735 190 4 865 296 283 26 102 13003 10348 2655 13003 18781 8590 1 552 163 1
13 207 107 0 1326 3728 0 20 4 3 0 5188 62 14 3559 1254 6 14 279 0 5188 1 1 15 11 609 4551 0 5188 3437 1014 85 0 274 76 216 28 58 5188 3398 1790 5188 6462 1697 1 56 34 1
14 208 5614 29 4418 11 1097 15 312 3065 0 14561 2884 78 153 3549 4806 3059 32 0 14561 49 19 998 19 6173 7303 0 14561 8800 639 1608 1538 1421 150 275 29 101 14561 11637 2924 14561 11328 15546 0 963 362 2
15 209 43 1 1963 11 0 5 1 1 0 2025 19 4 610 1353 16 1 22 0 2025 0 0 4 4 417 1600 0 2025 1522 90 10 0 194 100 100 3 6 2025 1816 209 2025 2174 1460 1 37 19 1
aggregate(.~ pam, data=DATAof,mean)
##   pam        V1   Código pared1_Ladrillo pared2_Piedra pared3_Adobe
## 1   1  90.95425 1050.961        7749.157      169.1176      8523.85
## 2   2 125.11905 1483.524       30052.833      152.1429     10380.24
## 3   3 135.00000 1501.000     1850434.000    10905.0000     51710.00
##   pared4_Tapia pared5_Quincha pared6_Piedra pared7_Madera pared8_Triplay
## 1    2290.3137       335.0196       471.183      1804.092       330.2157
## 2     135.3571      2528.3571       101.381      6049.810      3164.1667
## 3     562.0000      7089.0000      1244.000    197660.000     55594.0000
##   pared9_Otro pared10_Total techo1_Concreto techo2_Madera techo3_Tejas
## 1   0.1372549      21673.08        5404.569      202.5686    3584.8105
## 2   0.4285714      52564.71       20347.452     1258.5238     939.8571
## 3   2.0000000    2175200.00     1616788.000    70951.0000   12324.0000
##   techo4_Planchas techo5_Caña techo6_Triplay techo7_Paja techo8_Otro
## 1        10901.30    220.0915       146.5621   1213.0392   0.1437908
## 2        22164.74   4896.8571      2078.2619    878.5714   0.4523810
## 3       417514.00  21627.0000     33153.0000   2841.0000   2.0000000
##   techo9_Total piso1_Parquet piso2_Láminas piso3_Losetas piso4_Madera
## 1     21673.08      318.1438      221.4379      1509.431      764.817
## 2     52564.71     1061.1429      447.4286      9425.810     2069.095
## 3   2175200.00   298751.0000    91740.0000    609326.000    26720.000
##   piso5_Cemento piso6_Tierra  piso7_Otro piso8_Total  agua1_Red
## 1      7498.399     11360.06   0.7973856    21673.08   12513.87
## 2     25746.024     13811.19   4.0238095    52564.71   37082.90
## 3   1017917.000    130607.00 139.0000000  2175200.00 1690717.00
##   agua2_Red_fueraVivienda agua3_Pilón agua4_Camión agua5_Pozo agua6_Manantial
## 1                3015.111    1301.804     296.0392   2714.137        586.6013
## 2                4129.643    2220.238    3174.1667   2952.286        186.7381
## 3              232583.000   69695.000  146223.0000  23016.000        119.0000
##   agua7_Río agua8_Otro agua9_Vecino agua10_Total   elec1_Sí  elec2_No
## 1  1084.908   66.58824     94.02614     21673.08   17489.09  4183.993
## 2  1978.167  329.59524    510.97619     52564.71   47297.60  5267.119
## 3   497.000 2025.00000  10325.00000   2175200.00 2088460.00 86740.000
##   elec3_Total   Castillo      Keiko ganaCastillo covidPositivos covidFallecidos
## 1    21673.08   31325.35   12335.68            1       1988.575        372.5621
## 2    52564.71   47138.45   70612.79            0       6664.952       1766.7381
## 3  2175200.00 1938262.00 3717920.00            0     389505.000      89708.0000
## PARA JERARQUICO

fviz_nbclust(DATAof, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
library(factoextra)

res.agnes<- hcut(g.dist, k = 3,hc_func='agnes',hc_method = "ward.D")

DATAof$agnes=res.agnes$cluster

# ver

head(DATAof,15)%>%kbl()%>%kable_styling()
V1 Código pared1_Ladrillo pared2_Piedra pared3_Adobe pared4_Tapia pared5_Quincha pared6_Piedra pared7_Madera pared8_Triplay pared9_Otro pared10_Total techo1_Concreto techo2_Madera techo3_Tejas techo4_Planchas techo5_Caña techo6_Triplay techo7_Paja techo8_Otro techo9_Total piso1_Parquet piso2_Láminas piso3_Losetas piso4_Madera piso5_Cemento piso6_Tierra piso7_Otro piso8_Total agua1_Red agua2_Red_fueraVivienda agua3_Pilón agua4_Camión agua5_Pozo agua6_Manantial agua7_Río agua8_Otro agua9_Vecino agua10_Total elec1_Sí elec2_No elec3_Total Castillo Keiko ganaCastillo covidPositivos covidFallecidos pam agnes
1 102 4633 46 6639 222 2518 127 4484 851 0 19520 2187 294 179 13186 160 106 3408 0 19520 6 19 647 157 7121 11569 1 19520 9429 4392 793 59 1792 270 2648 56 81 19520 13204 6316 19520 25629 10770 1 8126 462 1 1
2 103 1602 9 2729 240 157 36 2505 30 0 7308 692 75 382 6084 38 5 32 0 7308 5 2 165 132 2917 4087 0 7308 4569 1497 215 0 474 67 388 61 37 7308 6025 1283 7308 8374 5209 1 389 72 1 1
3 101 3782 22 5881 2476 309 168 1270 91 0 13999 2262 160 3393 8005 50 14 115 0 13999 23 36 1077 240 6189 6434 0 13999 10647 1619 184 49 876 92 488 24 20 13999 12248 1751 13999 15671 10473 1 2174 281 1 1
4 104 291 7 672 8 386 7 8145 200 0 9716 56 188 177 2036 15 10 7234 0 9716 2 0 20 1523 943 7228 0 9716 1307 867 1003 2 2564 431 3428 80 34 9716 1792 7924 9716 13154 1446 1 3481 111 1 1
5 105 430 7 5217 6052 346 54 606 45 0 12757 187 43 3071 9343 26 12 75 0 12757 4 0 46 295 1911 10501 0 12757 7172 3097 1112 0 819 132 369 9 47 12757 10886 1871 12757 12606 7840 1 456 88 1 1
6 106 1546 7 2778 155 720 28 3646 24 0 8904 480 48 2810 5495 15 5 51 0 8904 3 4 264 176 2974 5483 0 8904 5256 1278 154 0 1020 211 948 29 8 8904 6895 2009 8904 7967 5491 1 110 60 1 1
7 107 4727 35 17199 2964 1836 518 2714 210 0 30203 2595 340 308 26620 196 62 82 0 30203 20 32 940 328 10631 18252 0 30203 14712 8760 1308 117 2502 471 2052 104 177 30203 24395 5808 30203 36540 19222 1 3749 336 1 1
8 202 15 1 1763 70 7 65 2 18 0 1941 9 57 403 1297 10 17 148 0 1941 0 0 4 24 195 1718 0 1941 1451 42 10 0 230 121 76 2 9 1941 1528 413 1941 2325 1413 1 79 26 1 1
9 203 97 0 658 3014 7 7 3 0 0 3786 29 12 1146 2341 8 4 246 0 3786 0 0 16 17 314 3439 0 3786 3229 222 40 0 190 61 39 1 4 3786 3089 697 3786 5056 788 1 54 31 1 1
10 204 215 3 368 1701 4 6 4 1 0 2302 76 8 1893 314 5 3 3 0 2302 5 1 41 12 409 1834 0 2302 1642 444 4 0 124 27 49 6 6 2302 2032 270 2302 2860 827 1 59 21 1 1
11 205 506 12 3703 2332 27 183 14 41 0 6818 261 46 1365 4554 40 35 517 0 6818 6 8 91 86 1854 4773 0 6818 4930 479 113 0 579 242 456 11 8 6818 5375 1443 6818 7690 3994 1 242 96 1 1
12 206 1913 3 10846 140 9 15 48 29 0 13003 1543 118 5794 5376 68 60 44 0 13003 14 8 394 20 3212 9355 0 13003 9502 1735 190 4 865 296 283 26 102 13003 10348 2655 13003 18781 8590 1 552 163 1 1
13 207 107 0 1326 3728 0 20 4 3 0 5188 62 14 3559 1254 6 14 279 0 5188 1 1 15 11 609 4551 0 5188 3437 1014 85 0 274 76 216 28 58 5188 3398 1790 5188 6462 1697 1 56 34 1 1
14 208 5614 29 4418 11 1097 15 312 3065 0 14561 2884 78 153 3549 4806 3059 32 0 14561 49 19 998 19 6173 7303 0 14561 8800 639 1608 1538 1421 150 275 29 101 14561 11637 2924 14561 11328 15546 0 963 362 2 2
15 209 43 1 1963 11 0 5 1 1 0 2025 19 4 610 1353 16 1 22 0 2025 0 0 4 4 417 1600 0 2025 1522 90 10 0 194 100 100 3 6 2025 1816 209 2025 2174 1460 1 37 19 1 1
# Visualize
fviz_dend(res.agnes, cex = 0.7, horiz = T,main = "")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.