library(rio)
## Warning: package 'rio' was built under R version 4.4.1
data = import("dataOK_all - dataOK_all.csv")
str(data)
## 'data.frame': 196 obs. of 50 variables:
## $ V1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ key : chr "AMAZONAS+BAGUA" "AMAZONAS+BONGARA" "AMAZONAS+CHACHAPOYAS" "AMAZONAS+CONDORCANQUI" ...
## $ Código : int 102 103 101 104 105 106 107 202 203 204 ...
## $ pared1_Ladrillo : int 4633 1602 3782 291 430 1546 4727 15 97 215 ...
## $ pared2_Piedra : int 46 9 22 7 7 7 35 1 0 3 ...
## $ pared3_Adobe : int 6639 2729 5881 672 5217 2778 17199 1763 658 368 ...
## $ pared4_Tapia : int 222 240 2476 8 6052 155 2964 70 3014 1701 ...
## $ pared5_Quincha : int 2518 157 309 386 346 720 1836 7 7 4 ...
## $ pared6_Piedra : int 127 36 168 7 54 28 518 65 7 6 ...
## $ pared7_Madera : int 4484 2505 1270 8145 606 3646 2714 2 3 4 ...
## $ pared8_Triplay : int 851 30 91 200 45 24 210 18 0 1 ...
## $ pared9_Otro : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pared10_Total : int 19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
## $ techo1_Concreto : int 2187 692 2262 56 187 480 2595 9 29 76 ...
## $ techo2_Madera : int 294 75 160 188 43 48 340 57 12 8 ...
## $ techo3_Tejas : int 179 382 3393 177 3071 2810 308 403 1146 1893 ...
## $ techo4_Planchas : int 13186 6084 8005 2036 9343 5495 26620 1297 2341 314 ...
## $ techo5_Caña : int 160 38 50 15 26 15 196 10 8 5 ...
## $ techo6_Triplay : int 106 5 14 10 12 5 62 17 4 3 ...
## $ techo7_Paja : int 3408 32 115 7234 75 51 82 148 246 3 ...
## $ techo8_Otro : int 0 0 0 0 0 0 0 0 0 0 ...
## $ techo9_Total : int 19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
## $ piso1_Parquet : int 6 5 23 2 4 3 20 0 0 5 ...
## $ piso2_Láminas : int 19 2 36 0 0 4 32 0 0 1 ...
## $ piso3_Losetas : int 647 165 1077 20 46 264 940 4 16 41 ...
## $ piso4_Madera : int 157 132 240 1523 295 176 328 24 17 12 ...
## $ piso5_Cemento : int 7121 2917 6189 943 1911 2974 10631 195 314 409 ...
## $ piso6_Tierra : int 11569 4087 6434 7228 10501 5483 18252 1718 3439 1834 ...
## $ piso7_Otro : int 1 0 0 0 0 0 0 0 0 0 ...
## $ piso8_Total : int 19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
## $ agua1_Red : int 9429 4569 10647 1307 7172 5256 14712 1451 3229 1642 ...
## $ agua2_Red_fueraVivienda: int 4392 1497 1619 867 3097 1278 8760 42 222 444 ...
## $ agua3_Pilón : int 793 215 184 1003 1112 154 1308 10 40 4 ...
## $ agua4_Camión : int 59 0 49 2 0 0 117 0 0 0 ...
## $ agua5_Pozo : int 1792 474 876 2564 819 1020 2502 230 190 124 ...
## $ agua6_Manantial : int 270 67 92 431 132 211 471 121 61 27 ...
## $ agua7_Río : int 2648 388 488 3428 369 948 2052 76 39 49 ...
## $ agua8_Otro : int 56 61 24 80 9 29 104 2 1 6 ...
## $ agua9_Vecino : int 81 37 20 34 47 8 177 9 4 6 ...
## $ agua10_Total : int 19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
## $ elec1_Sí : int 13204 6025 12248 1792 10886 6895 24395 1528 3089 2032 ...
## $ elec2_No : int 6316 1283 1751 7924 1871 2009 5808 413 697 270 ...
## $ elec3_Total : int 19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
## $ departamento : chr "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
## $ provincia : chr "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
## $ Castillo : int 25629 8374 15671 13154 12606 7967 36540 2325 5056 2860 ...
## $ Keiko : int 10770 5209 10473 1446 7840 5491 19222 1413 788 827 ...
## $ ganaCastillo : int 1 1 1 1 1 1 1 1 1 1 ...
## $ covidPositivos : int 8126 389 2174 3481 456 110 3749 79 54 59 ...
## $ covidFallecidos : int 462 72 281 111 88 60 336 26 31 21 ...
library(cluster)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
datos_filtrados <- data %>%
filter(!(departamento == "LIMA" & provincia == "LIMA"))
datos_filtrados <- datos_filtrados %>%
mutate(
pct_agua_red = agua1_Red / agua10_Total,
razon_keiko_castillo = Keiko / Castillo,
tasa_fallecidos = (covidFallecidos / covidPositivos) * 1000
) %>%
filter(!is.na(tasa_fallecidos) & is.finite(tasa_fallecidos))
# Seleccionar variables para clustering
cluster_data <- datos_filtrados %>%
select(pct_agua_red, razon_keiko_castillo, tasa_fallecidos)
# Normalizar los datos
scaled_data <- scale(cluster_data)
summary(scaled_data)
## pct_agua_red razon_keiko_castillo tasa_fallecidos
## Min. :-2.8660 Min. :-1.0445 Min. :-1.3292
## 1st Qu.:-0.5575 1st Qu.:-0.7744 1st Qu.:-0.5957
## Median : 0.1402 Median :-0.2902 Median :-0.1675
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.8012 3rd Qu.: 0.4558 3rd Qu.: 0.2724
## Max. : 1.6271 Max. : 3.8998 Max. : 6.8136
CLUSTER POR JERARQUIA AGLOMERATIVA:
agnes_res <- agnes(scaled_data, method = "ward")
groups_agnes <- cutree(agnes_res, k = 3) # Asumiendo 3 clusters
summary(agnes_res)
## Object of class 'agnes' from call:
## agnes(x = scaled_data, method = "ward")
## Agglomerative coefficient: 0.9749954
## Order of objects:
## [1] 1 146 7 194 184 95 137 175 33 82 75 185 76 84 60 148 62 145
## [19] 20 57 40 89 59 83 88 66 31 69 70 64 168 169 23 26 48 24
## [37] 45 188 29 39 43 52 46 85 47 90 87 81 91 172 170 153 4 97
## [55] 138 96 151 171 98 162 164 163 167 166 173 86 154 117 110 144 152 192
## [73] 30 78 165 155 34 93 56 79 73 71 77 174 116 139 142 143 140 195
## [91] 2 5 92 120 178 179 181 61 147 176 21 101 37 104 118 122 54 94
## [109] 133 121 193 3 177 182 180 35 55 28 49 105 187 44 150 186 16 149
## [127] 74 38 41 50 107 6 58 136 13 32 53 63 9 17 68 80 72 8
## [145] 106 124 36 10 65 11 12 112 109 27 15 99 111 108 14 159 113 119
## [163] 115 161 125 128 67 123 158 190 18 19 102 126 131 127 141 130 134 132
## [181] 157 25 100 103 183 160 189 191 22 42 135 156 51 114 129
## Merge:
## [,1] [,2]
## [1,] -12 -112
## [2,] -69 -70
## [3,] -47 -90
## [4,] -16 -149
## [5,] -26 -48
## [6,] 3 -87
## [7,] -21 -101
## [8,] -162 -164
## [9,] -29 -39
## [10,] -91 -172
## [11,] -40 -89
## [12,] -163 -167
## [13,] -75 -185
## [14,] -10 -65
## [15,] -76 -84
## [16,] 7 -37
## [17,] -33 -82
## [18,] -46 -85
## [19,] -20 -57
## [20,] -126 -131
## [21,] -100 -103
## [22,] -130 -134
## [23,] -23 5
## [24,] -177 -182
## [25,] -7 -194
## [26,] -106 -124
## [27,] -54 -94
## [28,] -60 -148
## [29,] -98 8
## [30,] -168 -169
## [31,] -68 -80
## [32,] 24 -180
## [33,] -105 -187
## [34,] -25 21
## [35,] -118 -122
## [36,] -120 -178
## [37,] -179 -181
## [38,] -59 -83
## [39,] -45 -188
## [40,] -67 -123
## [41,] -5 -92
## [42,] -132 -157
## [43,] 38 -88
## [44,] -43 -52
## [45,] -115 -161
## [46,] 1 -109
## [47,] -38 -41
## [48,] 25 -184
## [49,] -127 -141
## [50,] -1 -146
## [51,] -44 -150
## [52,] 10 -170
## [53,] -19 -102
## [54,] -28 -49
## [55,] 12 -166
## [56,] -9 -17
## [57,] 31 -72
## [58,] -35 -55
## [59,] 28 -62
## [60,] -56 -79
## [61,] -61 -147
## [62,] 51 -186
## [63,] -8 26
## [64,] 4 -74
## [65,] 6 -81
## [66,] -104 35
## [67,] -137 -175
## [68,] 23 -24
## [69,] -58 -136
## [70,] 63 -36
## [71,] 36 37
## [72,] -151 -171
## [73,] -3 32
## [74,] -96 72
## [75,] -2 41
## [76,] 17 13
## [77,] -31 2
## [78,] -113 -119
## [79,] -160 -189
## [80,] -30 -78
## [81,] 43 -66
## [82,] -99 -111
## [83,] 50 48
## [84,] -77 -174
## [85,] -125 -128
## [86,] 19 11
## [87,] 9 44
## [88,] -18 53
## [89,] 46 -27
## [90,] -110 -144
## [91,] -53 -63
## [92,] 52 -153
## [93,] 47 -50
## [94,] 76 15
## [95,] 82 -108
## [96,] -86 -154
## [97,] 54 33
## [98,] 80 -165
## [99,] 29 55
## [100,] -121 -193
## [101,] -32 91
## [102,] -95 67
## [103,] 27 -133
## [104,] 22 42
## [105,] -14 -159
## [106,] 18 65
## [107,] 77 -64
## [108,] 59 -145
## [109,] 14 -11
## [110,] 61 -176
## [111,] 60 -73
## [112,] -6 69
## [113,] 73 58
## [114,] -152 -192
## [115,] -158 -190
## [116,] 107 30
## [117,] -97 -138
## [118,] 93 -107
## [119,] 75 71
## [120,] 109 89
## [121,] -71 84
## [122,] 88 20
## [123,] 49 104
## [124,] 45 85
## [125,] -15 95
## [126,] 68 39
## [127,] 98 -155
## [128,] 34 -183
## [129,] 99 -173
## [130,] 79 -191
## [131,] 96 -117
## [132,] -13 101
## [133,] 97 62
## [134,] 106 92
## [135,] 126 87
## [136,] 86 81
## [137,] 16 66
## [138,] -4 117
## [139,] -142 -143
## [140,] 83 102
## [141,] 74 129
## [142,] 40 115
## [143,] 64 118
## [144,] 137 103
## [145,] -22 -42
## [146,] 78 124
## [147,] -140 -195
## [148,] -135 -156
## [149,] 119 110
## [150,] 70 120
## [151,] 131 90
## [152,] 122 123
## [153,] 111 121
## [154,] -139 139
## [155,] 127 -34
## [156,] -51 -114
## [157,] 152 128
## [158,] 94 108
## [159,] 112 132
## [160,] 144 100
## [161,] 105 146
## [162,] 56 57
## [163,] 113 133
## [164,] 136 116
## [165,] 135 134
## [166,] 151 114
## [167,] 163 143
## [168,] -116 154
## [169,] 140 158
## [170,] 145 148
## [171,] 159 162
## [172,] 155 -93
## [173,] 161 142
## [174,] 168 147
## [175,] 150 125
## [176,] 170 156
## [177,] 149 160
## [178,] 171 175
## [179,] 138 141
## [180,] 164 165
## [181,] 172 153
## [182,] 173 157
## [183,] 179 166
## [184,] 169 180
## [185,] 176 -129
## [186,] 182 130
## [187,] 177 167
## [188,] 187 178
## [189,] 183 181
## [190,] 184 189
## [191,] 188 186
## [192,] 190 174
## [193,] 191 185
## [194,] 192 193
## Height:
## [1] 0.27115493 0.43650393 0.18618911 0.26078590 0.98366235 0.53746402
## [7] 0.34216914 2.06127778 0.16037613 0.39943025 0.13318755 0.49604525
## [13] 0.15141436 1.42348429 0.19443856 0.30230893 0.57867065 4.29812115
## [19] 0.16660562 0.44759821 0.12011662 0.88662617 0.23750115 0.24695130
## [25] 0.42368235 1.73748269 0.40015355 0.04876490 0.57277507 0.65677357
## [31] 0.20859315 3.29768671 0.18148683 0.06942249 0.34679784 0.72723566
## [37] 0.24229029 0.86867484 0.11798031 0.45178700 0.24822730 1.77535696
## [43] 0.16629460 0.54633808 0.05955850 0.08793436 0.33648227 0.86297180
## [49] 0.11939563 0.27335526 0.47494558 8.29034750 0.92023561 0.65834066
## [55] 3.00623481 0.38235026 0.35408498 1.00021047 0.20799756 0.11215188
## [61] 0.51736460 0.12224788 0.28770924 0.77431676 3.59571692 0.49814051
## [67] 0.77974254 1.23731174 0.46979301 1.84083549 0.63055368 6.78471583
## [73] 0.42299410 0.51031605 0.73593351 1.32545287 2.36573361 3.39591787
## [79] 0.30710111 0.59813341 1.26226460 0.69234288 0.43702739 11.99666356
## [85] 1.92207914 1.31660236 0.98072176 2.68902455 1.13135826 16.63403669
## [91] 0.39492880 0.24426451 0.67106094 0.22797679 0.35311671 0.23427476
## [97] 1.14686811 0.31371319 0.58302544 2.86886705 0.11039561 0.15684167
## [103] 0.89577291 0.33701281 0.22517858 1.08662995 0.19199995 0.53944792
## [109] 1.43964733 0.52230662 5.49700703 0.37870643 0.18443124 0.21818300
## [115] 0.62456896 0.30073959 1.66208056 0.28557313 0.50238746 0.22076823
## [121] 0.83442797 0.27275130 0.32814032 1.84873606 0.06193453 0.33640004
## [127] 1.06236096 0.26017205 0.48819129 0.66372275 6.51112796 0.60893069
## [133] 0.34736374 1.43006632 0.80466109 0.53676342 0.47069435 2.34010762
## [139] 0.28893466 1.55218727 0.20983587 0.29172167 2.93602081 0.33480348
## [145] 0.19055805 0.35303925 1.15574576 0.14410956 0.57970060 0.68469688
## [151] 0.04584235 0.25307057 0.46586160 2.72061429 0.72401079 0.42833651
## [157] 0.49730980 9.92101958 0.54248508 1.48885412 0.40117702 1.09468753
## [163] 0.25252878 0.72264031 0.43967025 2.37163395 0.24301994 1.05285399
## [169] 0.64109305 3.46103626 0.45419265 0.28313764 0.69538125 0.17342267
## [175] 1.25204215 0.26671148 0.71140741 0.18055375 0.53991526 0.24469325
## [181] 1.38379594 0.22225699 0.17882020 0.77333641 5.40162191 0.41593467
## [187] 0.77623187 13.30448578 1.08733535 2.19004838 1.14008120 2.82846980
## [193] 1.34457277 5.19152548
##
## Available components:
## [1] "order" "height" "ac" "merge" "diss" "call" "method" "data"
CLUSTER VIA DIANA
diana_res <- diana(scaled_data)
groups_diana <- cutree(diana_res, k = 3)
summary(diana_res)
## Merge:
## [,1] [,2]
## [1,] -12 -112
## [2,] -69 -70
## [3,] -47 -90
## [4,] -16 -149
## [5,] -26 -48
## [6,] 3 -87
## [7,] -21 -101
## [8,] -162 -164
## [9,] -91 -172
## [10,] -40 -89
## [11,] -163 -167
## [12,] -75 -185
## [13,] -10 -65
## [14,] -76 -84
## [15,] -33 -82
## [16,] 7 -37
## [17,] -23 5
## [18,] -46 -85
## [19,] -20 -57
## [20,] -100 -103
## [21,] -130 -134
## [22,] -177 -182
## [23,] -7 -194
## [24,] -106 -124
## [25,] -60 -148
## [26,] -168 -169
## [27,] -68 -80
## [28,] -105 -187
## [29,] -98 8
## [30,] -118 -122
## [31,] -25 20
## [32,] -59 -83
## [33,] -45 -188
## [34,] -67 -123
## [35,] -5 -92
## [36,] -132 -157
## [37,] -120 -179
## [38,] 32 -88
## [39,] -55 -180
## [40,] -115 -161
## [41,] 23 -184
## [42,] 9 -170
## [43,] -1 -146
## [44,] -44 -150
## [45,] 11 -166
## [46,] -19 -102
## [47,] -28 -49
## [48,] -104 -181
## [49,] -9 -17
## [50,] -24 -109
## [51,] 4 -74
## [52,] -62 -95
## [53,] -8 24
## [54,] -61 -147
## [55,] 27 -72
## [56,] 15 12
## [57,] -58 -136
## [58,] 44 -186
## [59,] -151 -171
## [60,] 33 -52
## [61,] 53 -36
## [62,] -31 2
## [63,] 6 -153
## [64,] 17 -29
## [65,] 1 -38
## [66,] -53 -56
## [67,] -73 -79
## [68,] -41 -50
## [69,] -3 22
## [70,] 18 -81
## [71,] -96 59
## [72,] 19 10
## [73,] 38 -66
## [74,] 36 -141
## [75,] -35 28
## [76,] 48 30
## [77,] -160 -189
## [78,] -30 -78
## [79,] 69 39
## [80,] 46 -126
## [81,] -77 -174
## [82,] -125 -128
## [83,] 29 45
## [84,] -108 -111
## [85,] 43 41
## [86,] -121 -178
## [87,] 35 37
## [88,] -99 -119
## [89,] -110 -144
## [90,] 13 55
## [91,] 62 26
## [92,] 51 75
## [93,] -86 -154
## [94,] 78 -165
## [95,] -32 -63
## [96,] 16 -131
## [97,] 63 42
## [98,] -18 80
## [99,] -15 84
## [100,] 31 21
## [101,] -2 54
## [102,] 25 -145
## [103,] 47 70
## [104,] 85 -137
## [105,] 61 -11
## [106,] -14 -127
## [107,] 52 -175
## [108,] -152 -192
## [109,] -158 -190
## [110,] 91 -64
## [111,] 65 -27
## [112,] -97 -138
## [113,] 40 82
## [114,] 101 -176
## [115,] -71 81
## [116,] 72 73
## [117,] 96 -133
## [118,] 56 97
## [119,] 94 -155
## [120,] 50 60
## [121,] 64 120
## [122,] 87 76
## [123,] 116 14
## [124,] 77 -191
## [125,] 102 71
## [126,] 83 -173
## [127,] 90 -43
## [128,] 122 86
## [129,] 100 74
## [130,] 105 111
## [131,] -13 -42
## [132,] 34 109
## [133,] 92 58
## [134,] -113 113
## [135,] 98 129
## [136,] 66 67
## [137,] 121 103
## [138,] 114 79
## [139,] 118 -39
## [140,] 104 107
## [141,] 130 -107
## [142,] -116 -143
## [143,] 106 117
## [144,] 135 -183
## [145,] 89 -117
## [146,] 93 126
## [147,] 110 136
## [148,] 123 139
## [149,] -140 -195
## [150,] -135 -156
## [151,] 127 95
## [152,] 145 108
## [153,] 133 68
## [154,] 143 -193
## [155,] 128 -54
## [156,] 99 88
## [157,] 134 -159
## [158,] -4 125
## [159,] -139 -142
## [160,] -51 -114
## [161,] -34 -93
## [162,] 140 148
## [163,] 119 146
## [164,] 153 137
## [165,] 141 57
## [166,] 131 115
## [167,] -6 156
## [168,] 147 -94
## [169,] 165 151
## [170,] 154 144
## [171,] 138 155
## [172,] 132 124
## [173,] 170 157
## [174,] 142 159
## [175,] -22 150
## [176,] 169 164
## [177,] 167 49
## [178,] 158 163
## [179,] 171 176
## [180,] 112 152
## [181,] 162 168
## [182,] 178 180
## [183,] 175 160
## [184,] 174 149
## [185,] 166 161
## [186,] 177 173
## [187,] 179 186
## [188,] 181 182
## [189,] 185 183
## [190,] 172 184
## [191,] 188 187
## [192,] 189 -129
## [193,] 191 190
## [194,] 193 192
## Order of objects:
## [1] 1 146 7 194 184 137 62 95 175 20 57 40 89 59 83 88 66 76
## [19] 84 33 82 75 185 47 90 87 153 91 172 170 39 31 69 70 168 169
## [37] 64 53 56 73 79 94 4 60 148 145 96 151 171 30 78 165 155 86
## [55] 154 98 162 164 163 167 166 173 97 138 110 144 117 152 192 2 61 147
## [73] 176 3 177 182 55 180 5 92 120 179 104 181 118 122 121 178 54 8
## [91] 106 124 36 11 12 112 38 27 107 58 136 10 65 68 80 72 43 32
## [109] 63 16 149 74 35 105 187 44 150 186 41 50 23 26 48 29 24 109
## [127] 45 188 52 28 49 46 85 81 6 15 108 111 99 119 9 17 14 127
## [145] 21 101 37 131 133 193 18 19 102 126 25 100 103 130 134 132 157 141
## [163] 183 113 115 161 125 128 159 67 123 158 190 160 189 191 116 143 139 142
## [181] 140 195 13 42 71 77 174 34 93 22 135 156 51 114 129
## Height:
## [1] 0.27115493 0.46332107 0.18618911 0.25628088 0.61766714 0.96086141
## [7] 0.31084406 0.62850483 1.43355781 0.16660562 0.39492313 0.12011662
## [13] 0.71630459 0.23750115 0.25091728 0.40592284 0.75916226 0.15141436
## [19] 1.10352121 0.16037613 0.33124994 0.13318755 0.73664229 0.05955850
## [25] 0.09574531 0.35976911 0.53004901 0.11939563 0.26145127 0.95758173
## [31] 2.45685253 0.35907905 0.04876490 0.48281923 0.20859315 0.64635336
## [37] 1.09222763 0.37390877 0.92855594 0.37493342 1.66240725 3.61787404
## [43] 1.26896261 0.19443856 0.58192407 0.76937220 0.39384459 0.35408498
## [49] 2.33404360 0.42299410 0.52301865 0.74092363 1.44985705 0.49814051
## [55] 1.08453556 0.22313741 0.11215188 0.45402057 0.12224788 0.28141817
## [61] 0.77734054 2.67879919 0.65834066 2.39123970 0.46979301 1.06272380
## [67] 1.16569915 0.63055368 4.91684343 0.56639120 0.31371319 0.67660594
## [73] 0.94694639 0.37610617 0.18443124 0.42621058 0.25234886 1.76022716
## [79] 0.24426451 0.46577398 0.25065905 0.75206469 0.28872568 0.41261698
## [85] 0.22517858 0.79664482 0.46397375 1.19145819 2.38961179 0.31121521
## [91] 0.19055805 0.35537269 0.62052067 0.82350852 0.04584235 0.36646196
## [97] 0.64999006 1.00908306 1.51362433 0.34736374 1.70740668 0.14410956
## [103] 0.48250979 0.20983587 0.32218423 0.79198502 1.14375062 0.52358139
## [109] 2.13676960 0.06193453 0.30267639 0.49622612 0.41196652 0.22076823
## [115] 0.86325130 0.27275130 0.35013101 1.16677022 0.37531121 1.51081118
## [121] 0.16538213 0.06942249 0.36037286 0.74643998 0.29593920 0.74643998
## [127] 0.24229029 0.35432516 0.94359689 0.28557313 0.60929048 0.16629460
## [133] 0.39188345 3.27385758 1.63054226 0.54091617 0.45825956 1.21515676
## [139] 0.46791919 2.13780778 0.28893466 3.27385758 0.62305015 1.05411637
## [145] 0.11039561 0.16114892 0.52418359 0.72027968 1.17874832 1.73869048
## [151] 0.53408194 0.28313764 0.43540485 0.87818021 0.22965528 0.17882020
## [157] 0.55820117 0.18055375 0.79799149 0.24469325 0.40773439 1.05782174
## [163] 2.11594862 0.87260600 0.25252878 0.66450055 0.43967025 1.22549295
## [169] 7.01122277 0.24301994 0.83717365 0.64109305 1.90826470 0.41593467
## [175] 0.76777398 4.67508744 1.02729123 2.12887338 1.29879692 2.92298277
## [181] 1.13135826 9.28598982 0.83473547 1.55991556 0.70562560 0.43702739
## [187] 3.18249379 1.42082015 4.08155900 2.12991500 1.14008120 2.70229309
## [193] 1.34457277 6.57294136
## Divisive coefficient:
## [1] 0.948823
##
## Available components:
## [1] "order" "height" "dc" "merge" "diss" "call" "data"
sil_agnes <- silhouette(groups_agnes, dist(scaled_data))
sil_diana <- silhouette(groups_diana, dist(scaled_data))
avg_sil_agnes <- mean(sil_agnes[, 3])
avg_sil_diana <- mean(sil_diana[, 3])
summary(sil_agnes)
## Silhouette of 195 units in 3 clusters from silhouette.default(x = groups_agnes, dist = dist(scaled_data)) :
## Cluster sizes and average silhouette widths:
## 90 98 7
## 0.2290844 0.3243515 0.3452700
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.2447 0.1842 0.3248 0.2811 0.4090 0.5572
summary(sil_diana)
## Silhouette of 195 units in 3 clusters from silhouette.default(x = groups_diana, dist = dist(scaled_data)) :
## Cluster sizes and average silhouette widths:
## 169 13 13
## 0.4516393 0.2401505 0.3081906
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.06912 0.32074 0.48538 0.42798 0.55728 0.63334
cat("Coeficiente de silueta promedio:\n")
## Coeficiente de silueta promedio:
cat(" - Aglomerativo:", avg_sil_agnes, "\n")
## - Aglomerativo: 0.281133
cat(" - Divisivo (DIANA):", avg_sil_diana, "\n")
## - Divisivo (DIANA): 0.4279768
fviz_cluster(list(data = scaled_data, cluster = groups_agnes),
main = "Clustering Aglomerativo (Ward)")
fviz_cluster(list(data = scaled_data, cluster = groups_diana),
main = "Clustering Divisivo (DIANA)")
if(avg_sil_diana > avg_sil_agnes) {
cat("\nConclusión: La técnica divisiva (DIANA) produce mejor agrupamiento")
} else {
cat("\nConclusión: La técnica aglomerativa produce mejor agrupamiento")
}
##
## Conclusión: La técnica divisiva (DIANA) produce mejor agrupamiento
cat("\n\nEvaluación de afirmaciones:")
##
##
## Evaluación de afirmaciones:
cat("\na) 'Mejor opción es jerárquica divisiva':",
ifelse(avg_sil_diana > avg_sil_agnes, "VERDADERO", "FALSO"))
##
## a) 'Mejor opción es jerárquica divisiva': VERDADERO
cat("\nb) 'Partición mejor que divisiva': No evaluada (requeriría K-means)")
##
## b) 'Partición mejor que divisiva': No evaluada (requeriría K-means)
cat("\nc) 'Aglomerativa mejor que divisiva':",
ifelse(avg_sil_agnes > avg_sil_diana, "VERDADERO", "FALSO"))
##
## c) 'Aglomerativa mejor que divisiva': FALSO
cat("\nd) 'Menos mal clasificados en aglomerativa':",
ifelse(avg_sil_agnes > avg_sil_diana, "POSIBLE", "IMPROBABLE"))
##
## d) 'Menos mal clasificados en aglomerativa': IMPROBABLE
# Cargar librerías necesarias
library(cluster)
library(factoextra)
library(dplyr)
library(ggplot2)
# Filtrar provincia de Lima y preparar datos
datos_filtrados <- data %>%
filter(!(departamento == "LIMA" & provincia == "LIMA")) %>%
mutate(
pct_agua_red = agua1_Red / agua10_Total,
razon_keiko_castillo = Keiko / Castillo,
tasa_fallecidos = (covidFallecidos / covidPositivos) * 1000
) %>%
filter(!is.na(tasa_fallecidos) & is.finite(tasa_fallecidos))
# Seleccionar variables y normalizar
cluster_data <- datos_filtrados %>%
select(pct_agua_red, razon_keiko_castillo, tasa_fallecidos)
scaled_data <- scale(cluster_data)
rownames(scaled_data) <- datos_filtrados$key
# Función para evaluar calidad de clustering
evaluar_clusters <- function(clusters, datos) {
sil <- silhouette(clusters, dist(datos))
return(list(
silueta_promedio = mean(sil[, 3]),
detalle_silueta = summary(sil)
))
}
# 1. Clustering Jerárquico Aglomerativo (Ward)
agnes_res <- agnes(scaled_data, method = "ward")
groups_agnes <- cutree(agnes_res, k = 3)
eval_agnes <- evaluar_clusters(groups_agnes, scaled_data)
# 2. Clustering Jerárquico Divisivo (DIANA)
diana_res <- diana(scaled_data)
groups_diana <- cutree(diana_res, k = 3)
eval_diana <- evaluar_clusters(groups_diana, scaled_data)
# 3. Comparar resultados
comparacion <- data.frame(
Tecnica = c("Jerárquico Aglomerativo", "Jerárquico Divisivo"),
Silueta_Promedio = c(eval_agnes$silueta_promedio, eval_diana$silueta_promedio),
Cluster_1 = c(sum(groups_agnes == 1), sum(groups_diana == 1)),
Cluster_2 = c(sum(groups_agnes == 2), sum(groups_diana == 2)),
Cluster_3 = c(sum(groups_agnes == 3), sum(groups_diana == 3))
)
# Visualizar comparación de coeficientes de silueta
ggplot(comparacion, aes(x = Tecnica, y = Silueta_Promedio, fill = Tecnica)) +
geom_col(width = 0.7) +
geom_text(aes(label = round(Silueta_Promedio, 3)), vjust = -0.5, size = 5) +
labs(title = "Comparación de Técnicas de Clustering",
subtitle = "Coeficiente de Silueta Promedio",
y = "Silueta Promedio", x = "") +
theme_minimal() +
theme(legend.position = "none",
text = element_text(size = 12),
axis.text = element_text(size = 12))
# Visualizar dendrogramas
fviz_dend(agnes_res, k = 3, main = "Clustering Aglomerativo (Método de Ward)")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fviz_dend(diana_res, k = 3, main = "Clustering Divisivo (DIANA)")
# Visualizar clusters
fviz_cluster(list(data = scaled_data, cluster = groups_agnes),
main = "Clusters Aglomerativos")
fviz_cluster(list(data = scaled_data, cluster = groups_diana),
main = "Clusters Divisivos")
# 4. Evaluación de afirmaciones
afirmaciones <- list(
a = eval_diana$silueta_promedio > eval_agnes$silueta_promedio,
c = eval_agnes$silueta_promedio > eval_diana$silueta_promedio,
d = FALSE # Inicializar como falso
)
# Calcular provincias "mal clasificadas" (discrepancias entre métodos)
discrepancias <- sum(groups_agnes != groups_diana)
total_provincias <- nrow(scaled_data)
# La afirmación d) sería verdadera si las discrepancias fueran menores en aglomerativo
# Pero dado que no tenemos una clasificación "verdadera", comparamos con el método con mejor silueta
afirmaciones$d <- discrepancias < total_provincias * 0.5 # Umbral arbitrario
# 5. Resultados finales
cat("=== RESULTADOS DEL ANÁLISIS ===\n\n")
## === RESULTADOS DEL ANÁLISIS ===
cat("Coeficientes de Silueta:\n")
## Coeficientes de Silueta:
cat(sprintf(" - Aglomerativo: %.4f\n", eval_agnes$silueta_promedio))
## - Aglomerativo: 0.2811
cat(sprintf(" - Divisivo (DIANA): %.4f\n\n", eval_diana$silueta_promedio))
## - Divisivo (DIANA): 0.4280
cat("Distribución de Clusters:\n")
## Distribución de Clusters:
print(comparacion[, c(1, 3:5)])
## Tecnica Cluster_1 Cluster_2 Cluster_3
## 1 Jerárquico Aglomerativo 90 98 7
## 2 Jerárquico Divisivo 169 13 13
cat(sprintf("\nProvincias con clasificación diferente entre métodos: %d/%d (%.1f%%)\n",
discrepancias, total_provincias, discrepancias/total_provincias*100))
##
## Provincias con clasificación diferente entre métodos: 115/195 (59.0%)
cat("\n=== EVALUACIÓN DE AFIRMACIONES ===\n")
##
## === EVALUACIÓN DE AFIRMACIONES ===
cat("a) 'Mejor opción es jerárquica divisiva':",
ifelse(afirmaciones$a, "VERDADERO", "FALSO"), "\n")
## a) 'Mejor opción es jerárquica divisiva': VERDADERO
cat("c) 'Aglomerativa mejor que divisiva':",
ifelse(afirmaciones$c, "VERDADERO", "FALSO"), "\n")
## c) 'Aglomerativa mejor que divisiva': FALSO
cat("d) 'Menos mal clasificados en aglomerativa':",
ifelse(afirmaciones$d, "VERDADERO", "FALSO"), "\n")
## d) 'Menos mal clasificados en aglomerativa': FALSO
cat("\nConclusión basada en coeficiente de silueta:\n")
##
## Conclusión basada en coeficiente de silueta:
if(eval_diana$silueta_promedio > eval_agnes$silueta_promedio) {
cat("-> La técnica divisiva produce clusters de mejor calidad")
} else {
cat("-> La técnica aglomerativa produce clusters de mejor calidad")
}
## -> La técnica divisiva produce clusters de mejor calidad
# Instalar paquetes si son necesarios
# install.packages("readxl")
# install.packages("dplyr")
# install.packages("purrr")
# Cargar librerías
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.1
library(dplyr)
library(purrr)
# Definir el nombre del archivo
archivo <- "CIA.xlsx"
# Obtener nombres de las hojas relevantes
hojas <- excel_sheets(archivo)[c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)]
# Leer y procesar cada hoja
datos <- map(hojas, function(hoja) {
# Leer datos
df <- read_excel(archivo, sheet = hoja)
# Renombrar columnas
nombres_columnas <- names(df)
if(length(nombres_columnas) >= 2) {
names(df) <- c("country", "valor")
}
# Limpiar y convertir valores
df <- df %>%
mutate(
valor = gsub("[$,%]", "", valor), # Eliminar $, %
valor = as.numeric(valor) # Convertir a numérico
) %>%
filter(!is.na(valor)) # Eliminar NA
# Renombrar columna de valor según la hoja
names(df)[2] <- case_when(
hoja == "Debt - external" ~ "debt_external_USD",
hoja == "Inflation rate (consumer prices" ~ "inflation_rate_pct",
hoja == "Youth unemployment rate (ages 1" ~ "youth_unemployment_pct",
hoja == "Public debt" ~ "public_debt_pct_GDP",
hoja == "Electricity - transmission_dist" ~ "electricity_losses_kWh",
hoja == "Electricity - consumption" ~ "electricity_consumption_kWh",
hoja == "Energy consumption per capita" ~ "energy_consumption_per_capita_Btu",
hoja == "Electricity - installed generat" ~ "electricity_capacity_kW",
hoja == "Carbon dioxide emissions" ~ "co2_emissions_metric_tons",
hoja == "Broadband - fixed subscriptions" ~ "broadband_subscriptions",
hoja == "Telephones - fixed lines" ~ "telephones_fixed_lines",
hoja == "Telephones - mobile cellular" ~ "telephones_mobile",
TRUE ~ names(df)[2]
)
return(df)
})
# Combinar todos los dataframes
datos_combinados <- reduce(datos, full_join, by = "country")
# Verificar estructura
str(datos_combinados)
## tibble [232 × 13] (S3: tbl_df/tbl/data.frame)
## $ country : chr [1:232] "China" "Mexico" "Indonesia" "India" ...
## $ debt_external_USD : num [1:232] 4.88e+11 3.06e+11 2.25e+11 2.13e+11 1.99e+11 ...
## $ inflation_rate_pct : num [1:232] 0.2 45842 45841 5 45751 ...
## $ youth_unemployment_pct : num [1:232] 45703 45782 45670 16 18 ...
## $ public_debt_pct_GDP : num [1:232] 47 45.3 45.3 46.5 83.7 ...
## $ electricity_losses_kWh : num [1:232] 3.25e+11 4.55e+10 2.75e+10 3.03e+11 1.07e+11 ...
## $ electricity_consumption_kWh : num [1:232] 8.89e+12 3.32e+11 3.56e+11 1.50e+12 6.08e+11 ...
## $ energy_consumption_per_capita_Btu: num [1:232] 1.14e+08 5.75e+07 3.74e+07 2.52e+07 4.89e+07 ...
## $ electricity_capacity_kW : num [1:232] 2.95e+09 1.06e+08 7.08e+07 4.99e+08 2.40e+08 ...
## $ co2_emissions_metric_tons : num [1:232] 1.22e+10 4.41e+08 8.30e+08 2.82e+09 4.38e+08 ...
## $ broadband_subscriptions : num [1:232] 6.36e+08 2.66e+07 1.35e+07 3.93e+07 4.84e+07 1.96e+07 3.59e+07 1.36e+07 8.91e+06 2.15e+06 ...
## $ telephones_fixed_lines : num [1:232] 1.73e+08 2.56e+07 9.16e+06 2.75e+07 2.56e+07 ...
## $ telephones_mobile : num [1:232] 1.81e+09 1.40e+08 3.52e+08 1.14e+09 2.13e+08 9.22e+07 2.45e+08 1.06e+08 8.74e+07 1.08e+08 ...
head(datos_combinados)
## # A tibble: 6 × 13
## country debt_external_USD inflation_rate_pct youth_unemployment_pct
## <chr> <dbl> <dbl> <dbl>
## 1 China 488114000000 0.2 45703
## 2 Mexico 306308000000 45842 45782
## 3 Indonesia 225273000000 45841 45670
## 4 India 212728000000 5 16
## 5 Brazil 198582000000 45751 18
## 6 Turkey (Turkiye) 149654000000 58.5 45823
## # ℹ 9 more variables: public_debt_pct_GDP <dbl>, electricity_losses_kWh <dbl>,
## # electricity_consumption_kWh <dbl>, energy_consumption_per_capita_Btu <dbl>,
## # electricity_capacity_kW <dbl>, co2_emissions_metric_tons <dbl>,
## # broadband_subscriptions <dbl>, telephones_fixed_lines <dbl>,
## # telephones_mobile <dbl>
# Opcional: guardar el resultado
# write.csv(datos_combinados, "datos_combinados_cia.csv", row.names = FALSE)
# Cargar paquetes necesarios
library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
# Leer y combinar datos
archivo <- "CIA.xlsx"
hojas <- excel_sheets(archivo)[c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)]
datos <- map(hojas, function(hoja) {
df <- read_excel(archivo, sheet = hoja)
names(df) <- c("country", "valor")
# Limpiar valores
df <- df %>%
mutate(
valor = gsub("[$,%]", "", valor),
valor = as.numeric(valor)
) %>%
filter(!is.na(valor))
# Renombrar columna según contenido
names(df)[2] <- case_when(
hoja == "Debt - external" ~ "debt_external_USD",
hoja == "Inflation rate (consumer prices" ~ "inflation_rate_pct",
hoja == "Youth unemployment rate (ages 1" ~ "youth_unemployment_pct",
hoja == "Public debt" ~ "public_debt_pct_GDP",
hoja == "Electricity - transmission_dist" ~ "electricity_losses_kWh",
hoja == "Electricity - consumption" ~ "electricity_consumption_kWh",
hoja == "Energy consumption per capita" ~ "energy_consumption_per_capita_Btu",
hoja == "Electricity - installed generat" ~ "electricity_capacity_kW",
hoja == "Carbon dioxide emissions" ~ "co2_emissions_metric_tons",
hoja == "Broadband - fixed subscriptions" ~ "broadband_subscriptions",
hoja == "Telephones - fixed lines" ~ "telephones_fixed_lines",
hoja == "Telephones - mobile cellular" ~ "telephones_mobile",
TRUE ~ names(df)[2]
)
return(df)
})
# Combinar datos
combined_data <- reduce(datos, full_join, by = "country")
# Filtrar variables para regresión
reg_data <- combined_data %>%
select(
country,
# Variables dependientes (ECON)
inflation_rate_pct,
public_debt_pct_GDP,
youth_unemployment_pct,
# Variables independientes (COMMs y ENERGY)
broadband_subscriptions,
telephones_fixed_lines,
telephones_mobile,
electricity_losses_kWh,
electricity_consumption_kWh,
energy_consumption_per_capita_Btu,
electricity_capacity_kW,
co2_emissions_metric_tons
) %>%
na.omit()
# Realizar las tres regresiones
model1 <- lm(inflation_rate_pct ~
broadband_subscriptions +
telephones_fixed_lines +
telephones_mobile +
electricity_losses_kWh +
electricity_consumption_kWh +
energy_consumption_per_capita_Btu +
electricity_capacity_kW +
co2_emissions_metric_tons,
data = reg_data)
model2 <- lm(public_debt_pct_GDP ~
broadband_subscriptions +
telephones_fixed_lines +
telephones_mobile +
electricity_losses_kWh +
electricity_consumption_kWh +
energy_consumption_per_capita_Btu +
electricity_capacity_kW +
co2_emissions_metric_tons,
data = reg_data)
model3 <- lm(youth_unemployment_pct ~
broadband_subscriptions +
telephones_fixed_lines +
telephones_mobile +
electricity_losses_kWh +
electricity_consumption_kWh +
energy_consumption_per_capita_Btu +
electricity_capacity_kW +
co2_emissions_metric_tons,
data = reg_data)
# Extraer coeficientes relevantes
results <- bind_rows(
tidy(model1) %>% filter(term %in% c("telephones_fixed_lines", "co2_emissions_metric_tons")) %>% mutate(model = "Inflation"),
tidy(model2) %>% filter(term %in% c("telephones_fixed_lines", "co2_emissions_metric_tons")) %>% mutate(model = "Public Debt"),
tidy(model3) %>% filter(term %in% c("telephones_fixed_lines", "co2_emissions_metric_tons")) %>% mutate(model = "Youth Unemployment")
)
# Analizar resultados
effects <- results %>%
mutate(
direction = ifelse(estimate < 0, "Inverso", "Directo"),
significant = p.value < 0.05
) %>%
select(model, term, estimate, direction, significant)
# Mostrar resultados
print(effects)
## # A tibble: 6 × 5
## model term estimate direction significant
## <chr> <chr> <dbl> <chr> <lgl>
## 1 Inflation telephones_fixed_lines -0.000367 Inverso FALSE
## 2 Inflation co2_emissions_metric_tons -0.0000101 Inverso FALSE
## 3 Public Debt telephones_fixed_lines -0.000204 Inverso FALSE
## 4 Public Debt co2_emissions_metric_tons 0.0000113 Directo FALSE
## 5 Youth Unemployment telephones_fixed_lines 0.000514 Directo FALSE
## 6 Youth Unemployment co2_emissions_metric_tons 0.0000103 Directo FALSE
# Evaluar las afirmaciones
a_check <- effects %>%
filter(model == "Inflation") %>%
summarise(check = all(direction == "Inverso" & significant)) %>%
pull(check)
b_check <- effects %>%
filter(model == "Public Debt") %>%
summarise(check = all(direction == "Inverso" & significant)) %>%
pull(check)
c_check <- effects %>%
filter(model == "Youth Unemployment") %>%
summarise(check = all(direction == "Directo" & significant)) %>%
pull(check)
d_check <- all(!effects$significant)
e_check <- any(effects$significant)
cat("\nResultado de las afirmaciones:")
##
## Resultado de las afirmaciones:
cat("\na) Efecto inverso en inflación:", a_check)
##
## a) Efecto inverso en inflación: FALSE
cat("\nb) Efecto inverso en deuda pública:", b_check)
##
## b) Efecto inverso en deuda pública: FALSE
cat("\nc) Efecto directo en desempleo juvenil:", c_check)
##
## c) Efecto directo en desempleo juvenil: FALSE
cat("\nd) Sin efecto en ninguna:", d_check)
##
## d) Sin efecto en ninguna: TRUE
cat("\ne) Efecto en al menos una:", e_check)
##
## e) Efecto en al menos una: FALSE
# Determinar respuesta correcta
answer <- if(a_check && b_check && c_check) {
"a, b y c son verdaderas"
} else if(e_check) {
"e) fixlines y CO2 tienen efecto en al menos una regresión"
} else if(d_check) {
"d) fixlines y CO2 no tienen efecto en ninguna regresión"
} else {
"Algunas afirmaciones son verdaderas pero no todas"
}
cat("\n\nRespuesta final:", answer)
##
##
## Respuesta final: d) fixlines y CO2 no tienen efecto en ninguna regresión