examen final

library(rio)

## Warning: package 'rio' was built under R version 4.4.1

data = import("dataOK_all - dataOK_all.csv")

str(data)

## 'data.frame':    196 obs. of  50 variables:
##  $ V1                     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ key                    : chr  "AMAZONAS+BAGUA" "AMAZONAS+BONGARA" "AMAZONAS+CHACHAPOYAS" "AMAZONAS+CONDORCANQUI" ...
##  $ Código                 : int  102 103 101 104 105 106 107 202 203 204 ...
##  $ pared1_Ladrillo        : int  4633 1602 3782 291 430 1546 4727 15 97 215 ...
##  $ pared2_Piedra          : int  46 9 22 7 7 7 35 1 0 3 ...
##  $ pared3_Adobe           : int  6639 2729 5881 672 5217 2778 17199 1763 658 368 ...
##  $ pared4_Tapia           : int  222 240 2476 8 6052 155 2964 70 3014 1701 ...
##  $ pared5_Quincha         : int  2518 157 309 386 346 720 1836 7 7 4 ...
##  $ pared6_Piedra          : int  127 36 168 7 54 28 518 65 7 6 ...
##  $ pared7_Madera          : int  4484 2505 1270 8145 606 3646 2714 2 3 4 ...
##  $ pared8_Triplay         : int  851 30 91 200 45 24 210 18 0 1 ...
##  $ pared9_Otro            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ pared10_Total          : int  19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
##  $ techo1_Concreto        : int  2187 692 2262 56 187 480 2595 9 29 76 ...
##  $ techo2_Madera          : int  294 75 160 188 43 48 340 57 12 8 ...
##  $ techo3_Tejas           : int  179 382 3393 177 3071 2810 308 403 1146 1893 ...
##  $ techo4_Planchas        : int  13186 6084 8005 2036 9343 5495 26620 1297 2341 314 ...
##  $ techo5_Caña            : int  160 38 50 15 26 15 196 10 8 5 ...
##  $ techo6_Triplay         : int  106 5 14 10 12 5 62 17 4 3 ...
##  $ techo7_Paja            : int  3408 32 115 7234 75 51 82 148 246 3 ...
##  $ techo8_Otro            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ techo9_Total           : int  19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
##  $ piso1_Parquet          : int  6 5 23 2 4 3 20 0 0 5 ...
##  $ piso2_Láminas          : int  19 2 36 0 0 4 32 0 0 1 ...
##  $ piso3_Losetas          : int  647 165 1077 20 46 264 940 4 16 41 ...
##  $ piso4_Madera           : int  157 132 240 1523 295 176 328 24 17 12 ...
##  $ piso5_Cemento          : int  7121 2917 6189 943 1911 2974 10631 195 314 409 ...
##  $ piso6_Tierra           : int  11569 4087 6434 7228 10501 5483 18252 1718 3439 1834 ...
##  $ piso7_Otro             : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ piso8_Total            : int  19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
##  $ agua1_Red              : int  9429 4569 10647 1307 7172 5256 14712 1451 3229 1642 ...
##  $ agua2_Red_fueraVivienda: int  4392 1497 1619 867 3097 1278 8760 42 222 444 ...
##  $ agua3_Pilón            : int  793 215 184 1003 1112 154 1308 10 40 4 ...
##  $ agua4_Camión           : int  59 0 49 2 0 0 117 0 0 0 ...
##  $ agua5_Pozo             : int  1792 474 876 2564 819 1020 2502 230 190 124 ...
##  $ agua6_Manantial        : int  270 67 92 431 132 211 471 121 61 27 ...
##  $ agua7_Río              : int  2648 388 488 3428 369 948 2052 76 39 49 ...
##  $ agua8_Otro             : int  56 61 24 80 9 29 104 2 1 6 ...
##  $ agua9_Vecino           : int  81 37 20 34 47 8 177 9 4 6 ...
##  $ agua10_Total           : int  19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
##  $ elec1_Sí               : int  13204 6025 12248 1792 10886 6895 24395 1528 3089 2032 ...
##  $ elec2_No               : int  6316 1283 1751 7924 1871 2009 5808 413 697 270 ...
##  $ elec3_Total            : int  19520 7308 13999 9716 12757 8904 30203 1941 3786 2302 ...
##  $ departamento           : chr  "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
##  $ provincia              : chr  "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
##  $ Castillo               : int  25629 8374 15671 13154 12606 7967 36540 2325 5056 2860 ...
##  $ Keiko                  : int  10770 5209 10473 1446 7840 5491 19222 1413 788 827 ...
##  $ ganaCastillo           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ covidPositivos         : int  8126 389 2174 3481 456 110 3749 79 54 59 ...
##  $ covidFallecidos        : int  462 72 281 111 88 60 336 26 31 21 ...

library(cluster)
library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

datos_filtrados <- data %>%
  filter(!(departamento == "LIMA" & provincia == "LIMA"))


datos_filtrados <- datos_filtrados %>%
  mutate(
    pct_agua_red = agua1_Red / agua10_Total,
    razon_keiko_castillo = Keiko / Castillo,
    tasa_fallecidos = (covidFallecidos / covidPositivos) * 1000
  ) %>%
  filter(!is.na(tasa_fallecidos) & is.finite(tasa_fallecidos))

# Seleccionar variables para clustering
cluster_data <- datos_filtrados %>%
  select(pct_agua_red, razon_keiko_castillo, tasa_fallecidos)

# Normalizar los datos
scaled_data <- scale(cluster_data)
summary(scaled_data)

##   pct_agua_red     razon_keiko_castillo tasa_fallecidos  
##  Min.   :-2.8660   Min.   :-1.0445      Min.   :-1.3292  
##  1st Qu.:-0.5575   1st Qu.:-0.7744      1st Qu.:-0.5957  
##  Median : 0.1402   Median :-0.2902      Median :-0.1675  
##  Mean   : 0.0000   Mean   : 0.0000      Mean   : 0.0000  
##  3rd Qu.: 0.8012   3rd Qu.: 0.4558      3rd Qu.: 0.2724  
##  Max.   : 1.6271   Max.   : 3.8998      Max.   : 6.8136

CLUSTER POR JERARQUIA AGLOMERATIVA:

agnes_res <- agnes(scaled_data, method = "ward")
groups_agnes <- cutree(agnes_res, k = 3)  # Asumiendo 3 clusters
summary(agnes_res)

## Object of class 'agnes' from call:
##  agnes(x = scaled_data, method = "ward") 
## Agglomerative coefficient:  0.9749954 
## Order of objects:
##   [1]   1 146   7 194 184  95 137 175  33  82  75 185  76  84  60 148  62 145
##  [19]  20  57  40  89  59  83  88  66  31  69  70  64 168 169  23  26  48  24
##  [37]  45 188  29  39  43  52  46  85  47  90  87  81  91 172 170 153   4  97
##  [55] 138  96 151 171  98 162 164 163 167 166 173  86 154 117 110 144 152 192
##  [73]  30  78 165 155  34  93  56  79  73  71  77 174 116 139 142 143 140 195
##  [91]   2   5  92 120 178 179 181  61 147 176  21 101  37 104 118 122  54  94
## [109] 133 121 193   3 177 182 180  35  55  28  49 105 187  44 150 186  16 149
## [127]  74  38  41  50 107   6  58 136  13  32  53  63   9  17  68  80  72   8
## [145] 106 124  36  10  65  11  12 112 109  27  15  99 111 108  14 159 113 119
## [163] 115 161 125 128  67 123 158 190  18  19 102 126 131 127 141 130 134 132
## [181] 157  25 100 103 183 160 189 191  22  42 135 156  51 114 129
## Merge:
##        [,1] [,2]
##   [1,]  -12 -112
##   [2,]  -69  -70
##   [3,]  -47  -90
##   [4,]  -16 -149
##   [5,]  -26  -48
##   [6,]    3  -87
##   [7,]  -21 -101
##   [8,] -162 -164
##   [9,]  -29  -39
##  [10,]  -91 -172
##  [11,]  -40  -89
##  [12,] -163 -167
##  [13,]  -75 -185
##  [14,]  -10  -65
##  [15,]  -76  -84
##  [16,]    7  -37
##  [17,]  -33  -82
##  [18,]  -46  -85
##  [19,]  -20  -57
##  [20,] -126 -131
##  [21,] -100 -103
##  [22,] -130 -134
##  [23,]  -23    5
##  [24,] -177 -182
##  [25,]   -7 -194
##  [26,] -106 -124
##  [27,]  -54  -94
##  [28,]  -60 -148
##  [29,]  -98    8
##  [30,] -168 -169
##  [31,]  -68  -80
##  [32,]   24 -180
##  [33,] -105 -187
##  [34,]  -25   21
##  [35,] -118 -122
##  [36,] -120 -178
##  [37,] -179 -181
##  [38,]  -59  -83
##  [39,]  -45 -188
##  [40,]  -67 -123
##  [41,]   -5  -92
##  [42,] -132 -157
##  [43,]   38  -88
##  [44,]  -43  -52
##  [45,] -115 -161
##  [46,]    1 -109
##  [47,]  -38  -41
##  [48,]   25 -184
##  [49,] -127 -141
##  [50,]   -1 -146
##  [51,]  -44 -150
##  [52,]   10 -170
##  [53,]  -19 -102
##  [54,]  -28  -49
##  [55,]   12 -166
##  [56,]   -9  -17
##  [57,]   31  -72
##  [58,]  -35  -55
##  [59,]   28  -62
##  [60,]  -56  -79
##  [61,]  -61 -147
##  [62,]   51 -186
##  [63,]   -8   26
##  [64,]    4  -74
##  [65,]    6  -81
##  [66,] -104   35
##  [67,] -137 -175
##  [68,]   23  -24
##  [69,]  -58 -136
##  [70,]   63  -36
##  [71,]   36   37
##  [72,] -151 -171
##  [73,]   -3   32
##  [74,]  -96   72
##  [75,]   -2   41
##  [76,]   17   13
##  [77,]  -31    2
##  [78,] -113 -119
##  [79,] -160 -189
##  [80,]  -30  -78
##  [81,]   43  -66
##  [82,]  -99 -111
##  [83,]   50   48
##  [84,]  -77 -174
##  [85,] -125 -128
##  [86,]   19   11
##  [87,]    9   44
##  [88,]  -18   53
##  [89,]   46  -27
##  [90,] -110 -144
##  [91,]  -53  -63
##  [92,]   52 -153
##  [93,]   47  -50
##  [94,]   76   15
##  [95,]   82 -108
##  [96,]  -86 -154
##  [97,]   54   33
##  [98,]   80 -165
##  [99,]   29   55
## [100,] -121 -193
## [101,]  -32   91
## [102,]  -95   67
## [103,]   27 -133
## [104,]   22   42
## [105,]  -14 -159
## [106,]   18   65
## [107,]   77  -64
## [108,]   59 -145
## [109,]   14  -11
## [110,]   61 -176
## [111,]   60  -73
## [112,]   -6   69
## [113,]   73   58
## [114,] -152 -192
## [115,] -158 -190
## [116,]  107   30
## [117,]  -97 -138
## [118,]   93 -107
## [119,]   75   71
## [120,]  109   89
## [121,]  -71   84
## [122,]   88   20
## [123,]   49  104
## [124,]   45   85
## [125,]  -15   95
## [126,]   68   39
## [127,]   98 -155
## [128,]   34 -183
## [129,]   99 -173
## [130,]   79 -191
## [131,]   96 -117
## [132,]  -13  101
## [133,]   97   62
## [134,]  106   92
## [135,]  126   87
## [136,]   86   81
## [137,]   16   66
## [138,]   -4  117
## [139,] -142 -143
## [140,]   83  102
## [141,]   74  129
## [142,]   40  115
## [143,]   64  118
## [144,]  137  103
## [145,]  -22  -42
## [146,]   78  124
## [147,] -140 -195
## [148,] -135 -156
## [149,]  119  110
## [150,]   70  120
## [151,]  131   90
## [152,]  122  123
## [153,]  111  121
## [154,] -139  139
## [155,]  127  -34
## [156,]  -51 -114
## [157,]  152  128
## [158,]   94  108
## [159,]  112  132
## [160,]  144  100
## [161,]  105  146
## [162,]   56   57
## [163,]  113  133
## [164,]  136  116
## [165,]  135  134
## [166,]  151  114
## [167,]  163  143
## [168,] -116  154
## [169,]  140  158
## [170,]  145  148
## [171,]  159  162
## [172,]  155  -93
## [173,]  161  142
## [174,]  168  147
## [175,]  150  125
## [176,]  170  156
## [177,]  149  160
## [178,]  171  175
## [179,]  138  141
## [180,]  164  165
## [181,]  172  153
## [182,]  173  157
## [183,]  179  166
## [184,]  169  180
## [185,]  176 -129
## [186,]  182  130
## [187,]  177  167
## [188,]  187  178
## [189,]  183  181
## [190,]  184  189
## [191,]  188  186
## [192,]  190  174
## [193,]  191  185
## [194,]  192  193
## Height:
##   [1]  0.27115493  0.43650393  0.18618911  0.26078590  0.98366235  0.53746402
##   [7]  0.34216914  2.06127778  0.16037613  0.39943025  0.13318755  0.49604525
##  [13]  0.15141436  1.42348429  0.19443856  0.30230893  0.57867065  4.29812115
##  [19]  0.16660562  0.44759821  0.12011662  0.88662617  0.23750115  0.24695130
##  [25]  0.42368235  1.73748269  0.40015355  0.04876490  0.57277507  0.65677357
##  [31]  0.20859315  3.29768671  0.18148683  0.06942249  0.34679784  0.72723566
##  [37]  0.24229029  0.86867484  0.11798031  0.45178700  0.24822730  1.77535696
##  [43]  0.16629460  0.54633808  0.05955850  0.08793436  0.33648227  0.86297180
##  [49]  0.11939563  0.27335526  0.47494558  8.29034750  0.92023561  0.65834066
##  [55]  3.00623481  0.38235026  0.35408498  1.00021047  0.20799756  0.11215188
##  [61]  0.51736460  0.12224788  0.28770924  0.77431676  3.59571692  0.49814051
##  [67]  0.77974254  1.23731174  0.46979301  1.84083549  0.63055368  6.78471583
##  [73]  0.42299410  0.51031605  0.73593351  1.32545287  2.36573361  3.39591787
##  [79]  0.30710111  0.59813341  1.26226460  0.69234288  0.43702739 11.99666356
##  [85]  1.92207914  1.31660236  0.98072176  2.68902455  1.13135826 16.63403669
##  [91]  0.39492880  0.24426451  0.67106094  0.22797679  0.35311671  0.23427476
##  [97]  1.14686811  0.31371319  0.58302544  2.86886705  0.11039561  0.15684167
## [103]  0.89577291  0.33701281  0.22517858  1.08662995  0.19199995  0.53944792
## [109]  1.43964733  0.52230662  5.49700703  0.37870643  0.18443124  0.21818300
## [115]  0.62456896  0.30073959  1.66208056  0.28557313  0.50238746  0.22076823
## [121]  0.83442797  0.27275130  0.32814032  1.84873606  0.06193453  0.33640004
## [127]  1.06236096  0.26017205  0.48819129  0.66372275  6.51112796  0.60893069
## [133]  0.34736374  1.43006632  0.80466109  0.53676342  0.47069435  2.34010762
## [139]  0.28893466  1.55218727  0.20983587  0.29172167  2.93602081  0.33480348
## [145]  0.19055805  0.35303925  1.15574576  0.14410956  0.57970060  0.68469688
## [151]  0.04584235  0.25307057  0.46586160  2.72061429  0.72401079  0.42833651
## [157]  0.49730980  9.92101958  0.54248508  1.48885412  0.40117702  1.09468753
## [163]  0.25252878  0.72264031  0.43967025  2.37163395  0.24301994  1.05285399
## [169]  0.64109305  3.46103626  0.45419265  0.28313764  0.69538125  0.17342267
## [175]  1.25204215  0.26671148  0.71140741  0.18055375  0.53991526  0.24469325
## [181]  1.38379594  0.22225699  0.17882020  0.77333641  5.40162191  0.41593467
## [187]  0.77623187 13.30448578  1.08733535  2.19004838  1.14008120  2.82846980
## [193]  1.34457277  5.19152548
## 
## Available components:
## [1] "order"  "height" "ac"     "merge"  "diss"   "call"   "method" "data"

CLUSTER VIA DIANA

diana_res <- diana(scaled_data)
groups_diana <- cutree(diana_res, k = 3)
summary(diana_res)

## Merge:
##        [,1] [,2]
##   [1,]  -12 -112
##   [2,]  -69  -70
##   [3,]  -47  -90
##   [4,]  -16 -149
##   [5,]  -26  -48
##   [6,]    3  -87
##   [7,]  -21 -101
##   [8,] -162 -164
##   [9,]  -91 -172
##  [10,]  -40  -89
##  [11,] -163 -167
##  [12,]  -75 -185
##  [13,]  -10  -65
##  [14,]  -76  -84
##  [15,]  -33  -82
##  [16,]    7  -37
##  [17,]  -23    5
##  [18,]  -46  -85
##  [19,]  -20  -57
##  [20,] -100 -103
##  [21,] -130 -134
##  [22,] -177 -182
##  [23,]   -7 -194
##  [24,] -106 -124
##  [25,]  -60 -148
##  [26,] -168 -169
##  [27,]  -68  -80
##  [28,] -105 -187
##  [29,]  -98    8
##  [30,] -118 -122
##  [31,]  -25   20
##  [32,]  -59  -83
##  [33,]  -45 -188
##  [34,]  -67 -123
##  [35,]   -5  -92
##  [36,] -132 -157
##  [37,] -120 -179
##  [38,]   32  -88
##  [39,]  -55 -180
##  [40,] -115 -161
##  [41,]   23 -184
##  [42,]    9 -170
##  [43,]   -1 -146
##  [44,]  -44 -150
##  [45,]   11 -166
##  [46,]  -19 -102
##  [47,]  -28  -49
##  [48,] -104 -181
##  [49,]   -9  -17
##  [50,]  -24 -109
##  [51,]    4  -74
##  [52,]  -62  -95
##  [53,]   -8   24
##  [54,]  -61 -147
##  [55,]   27  -72
##  [56,]   15   12
##  [57,]  -58 -136
##  [58,]   44 -186
##  [59,] -151 -171
##  [60,]   33  -52
##  [61,]   53  -36
##  [62,]  -31    2
##  [63,]    6 -153
##  [64,]   17  -29
##  [65,]    1  -38
##  [66,]  -53  -56
##  [67,]  -73  -79
##  [68,]  -41  -50
##  [69,]   -3   22
##  [70,]   18  -81
##  [71,]  -96   59
##  [72,]   19   10
##  [73,]   38  -66
##  [74,]   36 -141
##  [75,]  -35   28
##  [76,]   48   30
##  [77,] -160 -189
##  [78,]  -30  -78
##  [79,]   69   39
##  [80,]   46 -126
##  [81,]  -77 -174
##  [82,] -125 -128
##  [83,]   29   45
##  [84,] -108 -111
##  [85,]   43   41
##  [86,] -121 -178
##  [87,]   35   37
##  [88,]  -99 -119
##  [89,] -110 -144
##  [90,]   13   55
##  [91,]   62   26
##  [92,]   51   75
##  [93,]  -86 -154
##  [94,]   78 -165
##  [95,]  -32  -63
##  [96,]   16 -131
##  [97,]   63   42
##  [98,]  -18   80
##  [99,]  -15   84
## [100,]   31   21
## [101,]   -2   54
## [102,]   25 -145
## [103,]   47   70
## [104,]   85 -137
## [105,]   61  -11
## [106,]  -14 -127
## [107,]   52 -175
## [108,] -152 -192
## [109,] -158 -190
## [110,]   91  -64
## [111,]   65  -27
## [112,]  -97 -138
## [113,]   40   82
## [114,]  101 -176
## [115,]  -71   81
## [116,]   72   73
## [117,]   96 -133
## [118,]   56   97
## [119,]   94 -155
## [120,]   50   60
## [121,]   64  120
## [122,]   87   76
## [123,]  116   14
## [124,]   77 -191
## [125,]  102   71
## [126,]   83 -173
## [127,]   90  -43
## [128,]  122   86
## [129,]  100   74
## [130,]  105  111
## [131,]  -13  -42
## [132,]   34  109
## [133,]   92   58
## [134,] -113  113
## [135,]   98  129
## [136,]   66   67
## [137,]  121  103
## [138,]  114   79
## [139,]  118  -39
## [140,]  104  107
## [141,]  130 -107
## [142,] -116 -143
## [143,]  106  117
## [144,]  135 -183
## [145,]   89 -117
## [146,]   93  126
## [147,]  110  136
## [148,]  123  139
## [149,] -140 -195
## [150,] -135 -156
## [151,]  127   95
## [152,]  145  108
## [153,]  133   68
## [154,]  143 -193
## [155,]  128  -54
## [156,]   99   88
## [157,]  134 -159
## [158,]   -4  125
## [159,] -139 -142
## [160,]  -51 -114
## [161,]  -34  -93
## [162,]  140  148
## [163,]  119  146
## [164,]  153  137
## [165,]  141   57
## [166,]  131  115
## [167,]   -6  156
## [168,]  147  -94
## [169,]  165  151
## [170,]  154  144
## [171,]  138  155
## [172,]  132  124
## [173,]  170  157
## [174,]  142  159
## [175,]  -22  150
## [176,]  169  164
## [177,]  167   49
## [178,]  158  163
## [179,]  171  176
## [180,]  112  152
## [181,]  162  168
## [182,]  178  180
## [183,]  175  160
## [184,]  174  149
## [185,]  166  161
## [186,]  177  173
## [187,]  179  186
## [188,]  181  182
## [189,]  185  183
## [190,]  172  184
## [191,]  188  187
## [192,]  189 -129
## [193,]  191  190
## [194,]  193  192
## Order of objects:
##   [1]   1 146   7 194 184 137  62  95 175  20  57  40  89  59  83  88  66  76
##  [19]  84  33  82  75 185  47  90  87 153  91 172 170  39  31  69  70 168 169
##  [37]  64  53  56  73  79  94   4  60 148 145  96 151 171  30  78 165 155  86
##  [55] 154  98 162 164 163 167 166 173  97 138 110 144 117 152 192   2  61 147
##  [73] 176   3 177 182  55 180   5  92 120 179 104 181 118 122 121 178  54   8
##  [91] 106 124  36  11  12 112  38  27 107  58 136  10  65  68  80  72  43  32
## [109]  63  16 149  74  35 105 187  44 150 186  41  50  23  26  48  29  24 109
## [127]  45 188  52  28  49  46  85  81   6  15 108 111  99 119   9  17  14 127
## [145]  21 101  37 131 133 193  18  19 102 126  25 100 103 130 134 132 157 141
## [163] 183 113 115 161 125 128 159  67 123 158 190 160 189 191 116 143 139 142
## [181] 140 195  13  42  71  77 174  34  93  22 135 156  51 114 129
## Height:
##   [1] 0.27115493 0.46332107 0.18618911 0.25628088 0.61766714 0.96086141
##   [7] 0.31084406 0.62850483 1.43355781 0.16660562 0.39492313 0.12011662
##  [13] 0.71630459 0.23750115 0.25091728 0.40592284 0.75916226 0.15141436
##  [19] 1.10352121 0.16037613 0.33124994 0.13318755 0.73664229 0.05955850
##  [25] 0.09574531 0.35976911 0.53004901 0.11939563 0.26145127 0.95758173
##  [31] 2.45685253 0.35907905 0.04876490 0.48281923 0.20859315 0.64635336
##  [37] 1.09222763 0.37390877 0.92855594 0.37493342 1.66240725 3.61787404
##  [43] 1.26896261 0.19443856 0.58192407 0.76937220 0.39384459 0.35408498
##  [49] 2.33404360 0.42299410 0.52301865 0.74092363 1.44985705 0.49814051
##  [55] 1.08453556 0.22313741 0.11215188 0.45402057 0.12224788 0.28141817
##  [61] 0.77734054 2.67879919 0.65834066 2.39123970 0.46979301 1.06272380
##  [67] 1.16569915 0.63055368 4.91684343 0.56639120 0.31371319 0.67660594
##  [73] 0.94694639 0.37610617 0.18443124 0.42621058 0.25234886 1.76022716
##  [79] 0.24426451 0.46577398 0.25065905 0.75206469 0.28872568 0.41261698
##  [85] 0.22517858 0.79664482 0.46397375 1.19145819 2.38961179 0.31121521
##  [91] 0.19055805 0.35537269 0.62052067 0.82350852 0.04584235 0.36646196
##  [97] 0.64999006 1.00908306 1.51362433 0.34736374 1.70740668 0.14410956
## [103] 0.48250979 0.20983587 0.32218423 0.79198502 1.14375062 0.52358139
## [109] 2.13676960 0.06193453 0.30267639 0.49622612 0.41196652 0.22076823
## [115] 0.86325130 0.27275130 0.35013101 1.16677022 0.37531121 1.51081118
## [121] 0.16538213 0.06942249 0.36037286 0.74643998 0.29593920 0.74643998
## [127] 0.24229029 0.35432516 0.94359689 0.28557313 0.60929048 0.16629460
## [133] 0.39188345 3.27385758 1.63054226 0.54091617 0.45825956 1.21515676
## [139] 0.46791919 2.13780778 0.28893466 3.27385758 0.62305015 1.05411637
## [145] 0.11039561 0.16114892 0.52418359 0.72027968 1.17874832 1.73869048
## [151] 0.53408194 0.28313764 0.43540485 0.87818021 0.22965528 0.17882020
## [157] 0.55820117 0.18055375 0.79799149 0.24469325 0.40773439 1.05782174
## [163] 2.11594862 0.87260600 0.25252878 0.66450055 0.43967025 1.22549295
## [169] 7.01122277 0.24301994 0.83717365 0.64109305 1.90826470 0.41593467
## [175] 0.76777398 4.67508744 1.02729123 2.12887338 1.29879692 2.92298277
## [181] 1.13135826 9.28598982 0.83473547 1.55991556 0.70562560 0.43702739
## [187] 3.18249379 1.42082015 4.08155900 2.12991500 1.14008120 2.70229309
## [193] 1.34457277 6.57294136
## Divisive coefficient:
## [1] 0.948823
## 
## Available components:
## [1] "order"  "height" "dc"     "merge"  "diss"   "call"   "data"

sil_agnes <- silhouette(groups_agnes, dist(scaled_data))
sil_diana <- silhouette(groups_diana, dist(scaled_data))


avg_sil_agnes <- mean(sil_agnes[, 3])
avg_sil_diana <- mean(sil_diana[, 3])

summary(sil_agnes)

## Silhouette of 195 units in 3 clusters from silhouette.default(x = groups_agnes, dist = dist(scaled_data)) :
##  Cluster sizes and average silhouette widths:
##        90        98         7 
## 0.2290844 0.3243515 0.3452700 
## Individual silhouette widths:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.2447  0.1842  0.3248  0.2811  0.4090  0.5572

summary(sil_diana)

## Silhouette of 195 units in 3 clusters from silhouette.default(x = groups_diana, dist = dist(scaled_data)) :
##  Cluster sizes and average silhouette widths:
##       169        13        13 
## 0.4516393 0.2401505 0.3081906 
## Individual silhouette widths:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.06912  0.32074  0.48538  0.42798  0.55728  0.63334

cat("Coeficiente de silueta promedio:\n")

## Coeficiente de silueta promedio:

cat(" - Aglomerativo:", avg_sil_agnes, "\n")

##  - Aglomerativo: 0.281133

cat(" - Divisivo (DIANA):", avg_sil_diana, "\n")

##  - Divisivo (DIANA): 0.4279768

fviz_cluster(list(data = scaled_data, cluster = groups_agnes), 
             main = "Clustering Aglomerativo (Ward)")

fviz_cluster(list(data = scaled_data, cluster = groups_diana), 
             main = "Clustering Divisivo (DIANA)")

if(avg_sil_diana > avg_sil_agnes) {
  cat("\nConclusión: La técnica divisiva (DIANA) produce mejor agrupamiento")
} else {
  cat("\nConclusión: La técnica aglomerativa produce mejor agrupamiento")
}

## 
## Conclusión: La técnica divisiva (DIANA) produce mejor agrupamiento

cat("\n\nEvaluación de afirmaciones:")

## 
## 
## Evaluación de afirmaciones:

cat("\na) 'Mejor opción es jerárquica divisiva':", 
    ifelse(avg_sil_diana > avg_sil_agnes, "VERDADERO", "FALSO"))

## 
## a) 'Mejor opción es jerárquica divisiva': VERDADERO

cat("\nb) 'Partición mejor que divisiva': No evaluada (requeriría K-means)")

## 
## b) 'Partición mejor que divisiva': No evaluada (requeriría K-means)

cat("\nc) 'Aglomerativa mejor que divisiva':", 
    ifelse(avg_sil_agnes > avg_sil_diana, "VERDADERO", "FALSO"))

## 
## c) 'Aglomerativa mejor que divisiva': FALSO

cat("\nd) 'Menos mal clasificados en aglomerativa':",
    ifelse(avg_sil_agnes > avg_sil_diana, "POSIBLE", "IMPROBABLE"))

## 
## d) 'Menos mal clasificados en aglomerativa': IMPROBABLE

# Cargar librerías necesarias
library(cluster)
library(factoextra)
library(dplyr)
library(ggplot2)


# Filtrar provincia de Lima y preparar datos
datos_filtrados <- data %>%
  filter(!(departamento == "LIMA" & provincia == "LIMA")) %>%
  mutate(
    pct_agua_red = agua1_Red / agua10_Total,
    razon_keiko_castillo = Keiko / Castillo,
    tasa_fallecidos = (covidFallecidos / covidPositivos) * 1000
  ) %>%
  filter(!is.na(tasa_fallecidos) & is.finite(tasa_fallecidos))

# Seleccionar variables y normalizar
cluster_data <- datos_filtrados %>%
  select(pct_agua_red, razon_keiko_castillo, tasa_fallecidos)

scaled_data <- scale(cluster_data)
rownames(scaled_data) <- datos_filtrados$key

# Función para evaluar calidad de clustering
evaluar_clusters <- function(clusters, datos) {
  sil <- silhouette(clusters, dist(datos))
  return(list(
    silueta_promedio = mean(sil[, 3]),
    detalle_silueta = summary(sil)
  ))
}

# 1. Clustering Jerárquico Aglomerativo (Ward)
agnes_res <- agnes(scaled_data, method = "ward")
groups_agnes <- cutree(agnes_res, k = 3)
eval_agnes <- evaluar_clusters(groups_agnes, scaled_data)

# 2. Clustering Jerárquico Divisivo (DIANA)
diana_res <- diana(scaled_data)
groups_diana <- cutree(diana_res, k = 3)
eval_diana <- evaluar_clusters(groups_diana, scaled_data)

# 3. Comparar resultados
comparacion <- data.frame(
  Tecnica = c("Jerárquico Aglomerativo", "Jerárquico Divisivo"),
  Silueta_Promedio = c(eval_agnes$silueta_promedio, eval_diana$silueta_promedio),
  Cluster_1 = c(sum(groups_agnes == 1), sum(groups_diana == 1)),
  Cluster_2 = c(sum(groups_agnes == 2), sum(groups_diana == 2)),
  Cluster_3 = c(sum(groups_agnes == 3), sum(groups_diana == 3))
)

# Visualizar comparación de coeficientes de silueta
ggplot(comparacion, aes(x = Tecnica, y = Silueta_Promedio, fill = Tecnica)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = round(Silueta_Promedio, 3)), vjust = -0.5, size = 5) +
  labs(title = "Comparación de Técnicas de Clustering",
       subtitle = "Coeficiente de Silueta Promedio",
       y = "Silueta Promedio", x = "") +
  theme_minimal() +
  theme(legend.position = "none",
        text = element_text(size = 12),
        axis.text = element_text(size = 12))

# Visualizar dendrogramas
fviz_dend(agnes_res, k = 3, main = "Clustering Aglomerativo (Método de Ward)")

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fviz_dend(diana_res, k = 3, main = "Clustering Divisivo (DIANA)")

# Visualizar clusters
fviz_cluster(list(data = scaled_data, cluster = groups_agnes),
             main = "Clusters Aglomerativos")

fviz_cluster(list(data = scaled_data, cluster = groups_diana),
             main = "Clusters Divisivos")

# 4. Evaluación de afirmaciones
afirmaciones <- list(
  a = eval_diana$silueta_promedio > eval_agnes$silueta_promedio,
  c = eval_agnes$silueta_promedio > eval_diana$silueta_promedio,
  d = FALSE  # Inicializar como falso
)

# Calcular provincias "mal clasificadas" (discrepancias entre métodos)
discrepancias <- sum(groups_agnes != groups_diana)
total_provincias <- nrow(scaled_data)

# La afirmación d) sería verdadera si las discrepancias fueran menores en aglomerativo
# Pero dado que no tenemos una clasificación "verdadera", comparamos con el método con mejor silueta
afirmaciones$d <- discrepancias < total_provincias * 0.5  # Umbral arbitrario

# 5. Resultados finales
cat("=== RESULTADOS DEL ANÁLISIS ===\n\n")

## === RESULTADOS DEL ANÁLISIS ===

cat("Coeficientes de Silueta:\n")

## Coeficientes de Silueta:

cat(sprintf(" - Aglomerativo: %.4f\n", eval_agnes$silueta_promedio))

##  - Aglomerativo: 0.2811

cat(sprintf(" - Divisivo (DIANA): %.4f\n\n", eval_diana$silueta_promedio))

##  - Divisivo (DIANA): 0.4280

cat("Distribución de Clusters:\n")

## Distribución de Clusters:

print(comparacion[, c(1, 3:5)])

##                   Tecnica Cluster_1 Cluster_2 Cluster_3
## 1 Jerárquico Aglomerativo        90        98         7
## 2     Jerárquico Divisivo       169        13        13

cat(sprintf("\nProvincias con clasificación diferente entre métodos: %d/%d (%.1f%%)\n",
            discrepancias, total_provincias, discrepancias/total_provincias*100))

## 
## Provincias con clasificación diferente entre métodos: 115/195 (59.0%)

cat("\n=== EVALUACIÓN DE AFIRMACIONES ===\n")

## 
## === EVALUACIÓN DE AFIRMACIONES ===

cat("a) 'Mejor opción es jerárquica divisiva':", 
    ifelse(afirmaciones$a, "VERDADERO", "FALSO"), "\n")

## a) 'Mejor opción es jerárquica divisiva': VERDADERO

cat("c) 'Aglomerativa mejor que divisiva':", 
    ifelse(afirmaciones$c, "VERDADERO", "FALSO"), "\n")

## c) 'Aglomerativa mejor que divisiva': FALSO

cat("d) 'Menos mal clasificados en aglomerativa':", 
    ifelse(afirmaciones$d, "VERDADERO", "FALSO"), "\n")

## d) 'Menos mal clasificados en aglomerativa': FALSO

cat("\nConclusión basada en coeficiente de silueta:\n")

## 
## Conclusión basada en coeficiente de silueta:

if(eval_diana$silueta_promedio > eval_agnes$silueta_promedio) {
  cat("-> La técnica divisiva produce clusters de mejor calidad")
} else {
  cat("-> La técnica aglomerativa produce clusters de mejor calidad")
}

## -> La técnica divisiva produce clusters de mejor calidad

# Instalar paquetes si son necesarios
# install.packages("readxl")
# install.packages("dplyr")
# install.packages("purrr")

# Cargar librerías
library(readxl)

## Warning: package 'readxl' was built under R version 4.4.1

library(dplyr)
library(purrr)

# Definir el nombre del archivo
archivo <- "CIA.xlsx"

# Obtener nombres de las hojas relevantes
hojas <- excel_sheets(archivo)[c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)]

# Leer y procesar cada hoja
datos <- map(hojas, function(hoja) {
  # Leer datos
  df <- read_excel(archivo, sheet = hoja)
  
  # Renombrar columnas
  nombres_columnas <- names(df)
  if(length(nombres_columnas) >= 2) {
    names(df) <- c("country", "valor")
  }
  
  # Limpiar y convertir valores
  df <- df %>%
    mutate(
      valor = gsub("[$,%]", "", valor),  # Eliminar $, %
      valor = as.numeric(valor)           # Convertir a numérico
    ) %>%
    filter(!is.na(valor))                 # Eliminar NA
  
  # Renombrar columna de valor según la hoja
  names(df)[2] <- case_when(
    hoja == "Debt - external" ~ "debt_external_USD",
    hoja == "Inflation rate (consumer prices" ~ "inflation_rate_pct",
    hoja == "Youth unemployment rate (ages 1" ~ "youth_unemployment_pct",
    hoja == "Public debt" ~ "public_debt_pct_GDP",
    hoja == "Electricity - transmission_dist" ~ "electricity_losses_kWh",
    hoja == "Electricity - consumption" ~ "electricity_consumption_kWh",
    hoja == "Energy consumption per capita" ~ "energy_consumption_per_capita_Btu",
    hoja == "Electricity - installed generat" ~ "electricity_capacity_kW",
    hoja == "Carbon dioxide emissions" ~ "co2_emissions_metric_tons",
    hoja == "Broadband - fixed subscriptions" ~ "broadband_subscriptions",
    hoja == "Telephones - fixed lines" ~ "telephones_fixed_lines",
    hoja == "Telephones - mobile cellular" ~ "telephones_mobile",
    TRUE ~ names(df)[2]
  )
  
  return(df)
})

# Combinar todos los dataframes
datos_combinados <- reduce(datos, full_join, by = "country")

# Verificar estructura
str(datos_combinados)

## tibble [232 × 13] (S3: tbl_df/tbl/data.frame)
##  $ country                          : chr [1:232] "China" "Mexico" "Indonesia" "India" ...
##  $ debt_external_USD                : num [1:232] 4.88e+11 3.06e+11 2.25e+11 2.13e+11 1.99e+11 ...
##  $ inflation_rate_pct               : num [1:232] 0.2 45842 45841 5 45751 ...
##  $ youth_unemployment_pct           : num [1:232] 45703 45782 45670 16 18 ...
##  $ public_debt_pct_GDP              : num [1:232] 47 45.3 45.3 46.5 83.7 ...
##  $ electricity_losses_kWh           : num [1:232] 3.25e+11 4.55e+10 2.75e+10 3.03e+11 1.07e+11 ...
##  $ electricity_consumption_kWh      : num [1:232] 8.89e+12 3.32e+11 3.56e+11 1.50e+12 6.08e+11 ...
##  $ energy_consumption_per_capita_Btu: num [1:232] 1.14e+08 5.75e+07 3.74e+07 2.52e+07 4.89e+07 ...
##  $ electricity_capacity_kW          : num [1:232] 2.95e+09 1.06e+08 7.08e+07 4.99e+08 2.40e+08 ...
##  $ co2_emissions_metric_tons        : num [1:232] 1.22e+10 4.41e+08 8.30e+08 2.82e+09 4.38e+08 ...
##  $ broadband_subscriptions          : num [1:232] 6.36e+08 2.66e+07 1.35e+07 3.93e+07 4.84e+07 1.96e+07 3.59e+07 1.36e+07 8.91e+06 2.15e+06 ...
##  $ telephones_fixed_lines           : num [1:232] 1.73e+08 2.56e+07 9.16e+06 2.75e+07 2.56e+07 ...
##  $ telephones_mobile                : num [1:232] 1.81e+09 1.40e+08 3.52e+08 1.14e+09 2.13e+08 9.22e+07 2.45e+08 1.06e+08 8.74e+07 1.08e+08 ...

head(datos_combinados)

## # A tibble: 6 × 13
##   country          debt_external_USD inflation_rate_pct youth_unemployment_pct
##   <chr>                        <dbl>              <dbl>                  <dbl>
## 1 China                 488114000000                0.2                  45703
## 2 Mexico                306308000000            45842                    45782
## 3 Indonesia             225273000000            45841                    45670
## 4 India                 212728000000                5                       16
## 5 Brazil                198582000000            45751                       18
## 6 Turkey (Turkiye)      149654000000               58.5                  45823
## # ℹ 9 more variables: public_debt_pct_GDP <dbl>, electricity_losses_kWh <dbl>,
## #   electricity_consumption_kWh <dbl>, energy_consumption_per_capita_Btu <dbl>,
## #   electricity_capacity_kW <dbl>, co2_emissions_metric_tons <dbl>,
## #   broadband_subscriptions <dbl>, telephones_fixed_lines <dbl>,
## #   telephones_mobile <dbl>

# Opcional: guardar el resultado
# write.csv(datos_combinados, "datos_combinados_cia.csv", row.names = FALSE)

# Cargar paquetes necesarios
library(readxl)
library(dplyr)
library(tidyr)
library(purrr)
library(broom)

# Leer y combinar datos
archivo <- "CIA.xlsx"
hojas <- excel_sheets(archivo)[c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)]

datos <- map(hojas, function(hoja) {
  df <- read_excel(archivo, sheet = hoja)
  names(df) <- c("country", "valor")
  
  # Limpiar valores
  df <- df %>%
    mutate(
      valor = gsub("[$,%]", "", valor),
      valor = as.numeric(valor)
    ) %>%
    filter(!is.na(valor))
  
  # Renombrar columna según contenido
  names(df)[2] <- case_when(
    hoja == "Debt - external" ~ "debt_external_USD",
    hoja == "Inflation rate (consumer prices" ~ "inflation_rate_pct",
    hoja == "Youth unemployment rate (ages 1" ~ "youth_unemployment_pct",
    hoja == "Public debt" ~ "public_debt_pct_GDP",
    hoja == "Electricity - transmission_dist" ~ "electricity_losses_kWh",
    hoja == "Electricity - consumption" ~ "electricity_consumption_kWh",
    hoja == "Energy consumption per capita" ~ "energy_consumption_per_capita_Btu",
    hoja == "Electricity - installed generat" ~ "electricity_capacity_kW",
    hoja == "Carbon dioxide emissions" ~ "co2_emissions_metric_tons",
    hoja == "Broadband - fixed subscriptions" ~ "broadband_subscriptions",
    hoja == "Telephones - fixed lines" ~ "telephones_fixed_lines",
    hoja == "Telephones - mobile cellular" ~ "telephones_mobile",
    TRUE ~ names(df)[2]
  )
  
  return(df)
})

# Combinar datos
combined_data <- reduce(datos, full_join, by = "country")

# Filtrar variables para regresión
reg_data <- combined_data %>%
  select(
    country,
    # Variables dependientes (ECON)
    inflation_rate_pct,
    public_debt_pct_GDP,
    youth_unemployment_pct,
    # Variables independientes (COMMs y ENERGY)
    broadband_subscriptions,
    telephones_fixed_lines,
    telephones_mobile,
    electricity_losses_kWh,
    electricity_consumption_kWh,
    energy_consumption_per_capita_Btu,
    electricity_capacity_kW,
    co2_emissions_metric_tons
  ) %>%
  na.omit()

# Realizar las tres regresiones
model1 <- lm(inflation_rate_pct ~ 
               broadband_subscriptions +
               telephones_fixed_lines +
               telephones_mobile +
               electricity_losses_kWh +
               electricity_consumption_kWh +
               energy_consumption_per_capita_Btu +
               electricity_capacity_kW +
               co2_emissions_metric_tons, 
             data = reg_data)

model2 <- lm(public_debt_pct_GDP ~ 
               broadband_subscriptions +
               telephones_fixed_lines +
               telephones_mobile +
               electricity_losses_kWh +
               electricity_consumption_kWh +
               energy_consumption_per_capita_Btu +
               electricity_capacity_kW +
               co2_emissions_metric_tons, 
             data = reg_data)

model3 <- lm(youth_unemployment_pct ~ 
               broadband_subscriptions +
               telephones_fixed_lines +
               telephones_mobile +
               electricity_losses_kWh +
               electricity_consumption_kWh +
               energy_consumption_per_capita_Btu +
               electricity_capacity_kW +
               co2_emissions_metric_tons, 
             data = reg_data)

# Extraer coeficientes relevantes
results <- bind_rows(
  tidy(model1) %>% filter(term %in% c("telephones_fixed_lines", "co2_emissions_metric_tons")) %>% mutate(model = "Inflation"),
  tidy(model2) %>% filter(term %in% c("telephones_fixed_lines", "co2_emissions_metric_tons")) %>% mutate(model = "Public Debt"),
  tidy(model3) %>% filter(term %in% c("telephones_fixed_lines", "co2_emissions_metric_tons")) %>% mutate(model = "Youth Unemployment")
)

# Analizar resultados
effects <- results %>%
  mutate(
    direction = ifelse(estimate < 0, "Inverso", "Directo"),
    significant = p.value < 0.05
  ) %>%
  select(model, term, estimate, direction, significant)

# Mostrar resultados
print(effects)

## # A tibble: 6 × 5
##   model              term                        estimate direction significant
##   <chr>              <chr>                          <dbl> <chr>     <lgl>      
## 1 Inflation          telephones_fixed_lines    -0.000367  Inverso   FALSE      
## 2 Inflation          co2_emissions_metric_tons -0.0000101 Inverso   FALSE      
## 3 Public Debt        telephones_fixed_lines    -0.000204  Inverso   FALSE      
## 4 Public Debt        co2_emissions_metric_tons  0.0000113 Directo   FALSE      
## 5 Youth Unemployment telephones_fixed_lines     0.000514  Directo   FALSE      
## 6 Youth Unemployment co2_emissions_metric_tons  0.0000103 Directo   FALSE

# Evaluar las afirmaciones
a_check <- effects %>%
  filter(model == "Inflation") %>%
  summarise(check = all(direction == "Inverso" & significant)) %>%
  pull(check)

b_check <- effects %>%
  filter(model == "Public Debt") %>%
  summarise(check = all(direction == "Inverso" & significant)) %>%
  pull(check)

c_check <- effects %>%
  filter(model == "Youth Unemployment") %>%
  summarise(check = all(direction == "Directo" & significant)) %>%
  pull(check)

d_check <- all(!effects$significant)

e_check <- any(effects$significant)

cat("\nResultado de las afirmaciones:")

## 
## Resultado de las afirmaciones:

cat("\na) Efecto inverso en inflación:", a_check)

## 
## a) Efecto inverso en inflación: FALSE

cat("\nb) Efecto inverso en deuda pública:", b_check)

## 
## b) Efecto inverso en deuda pública: FALSE

cat("\nc) Efecto directo en desempleo juvenil:", c_check)

## 
## c) Efecto directo en desempleo juvenil: FALSE

cat("\nd) Sin efecto en ninguna:", d_check)

## 
## d) Sin efecto en ninguna: TRUE

cat("\ne) Efecto en al menos una:", e_check)

## 
## e) Efecto en al menos una: FALSE

# Determinar respuesta correcta
answer <- if(a_check && b_check && c_check) {
  "a, b y c son verdaderas"
} else if(e_check) {
  "e) fixlines y CO2 tienen efecto en al menos una regresión"
} else if(d_check) {
  "d) fixlines y CO2 no tienen efecto en ninguna regresión"
} else {
  "Algunas afirmaciones son verdaderas pero no todas"
}

cat("\n\nRespuesta final:", answer)

## 
## 
## Respuesta final: d) fixlines y CO2 no tienen efecto en ninguna regresión

examen final

Steve Grimaldo

2025-06-27