library(AER)
## Cargando paquete requerido: car
## Cargando paquete requerido: carData
## Cargando paquete requerido: lmtest
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Cargando paquete requerido: sandwich
## Cargando paquete requerido: survival
library(rio)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(arm)
## Cargando paquete requerido: MASS
## 
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Cargando paquete requerido: Matrix
## Cargando paquete requerido: lme4
## 
## Adjuntando el paquete: 'lme4'
## The following object is masked from 'package:rio':
## 
##     factorize
## 
## arm (Version 1.14-4, built: 2024-4-1)
## Working directory is F:/PUCP/PUCP Sociales/Ciclo 6/Estadistica 2/Examen Final
## 
## Adjuntando el paquete: 'arm'
## The following object is masked from 'package:car':
## 
##     logit
library(Matrix)
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
library(kableExtra)
## 
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(lme4)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand()          masks Matrix::expand()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tidyr::pack()            masks Matrix::pack()
## ✖ dplyr::recode()          masks car::recode()
## ✖ MASS::select()           masks dplyr::select()
## ✖ purrr::some()            masks car::some()
## ✖ tidyr::unpack()          masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data2 = import("data2.xlsx")
## New names:
## • `` -> `...1`
names(data2)
##  [1] "...1"                    "key"                    
##  [3] "Código"                  "pared1_Ladrillo"        
##  [5] "pared2_Piedra"           "pared3_Adobe"           
##  [7] "pared4_Tapia"            "pared5_Quincha"         
##  [9] "pared6_Piedra"           "pared7_Madera"          
## [11] "pared8_Triplay"          "pared9_Otro"            
## [13] "pared10_Total"           "techo1_Concreto"        
## [15] "techo2_Madera"           "techo3_Tejas"           
## [17] "techo4_Planchas"         "techo5_Caña"            
## [19] "techo6_Triplay"          "techo7_Paja"            
## [21] "techo8_Otro"             "techo9_Total"           
## [23] "piso1_Parquet"           "piso2_Láminas"          
## [25] "piso3_Losetas"           "piso4_Madera"           
## [27] "piso5_Cemento"           "piso6_Tierra"           
## [29] "piso7_Otro"              "piso8_Total"            
## [31] "agua1_Red"               "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón"             "agua4_Camión"           
## [35] "agua5_Pozo"              "agua6_Manantial"        
## [37] "agua7_Río"               "agua8_Otro"             
## [39] "agua9_Vecino"            "agua10_Total"           
## [41] "elec1_Sí"                "elec2_No"               
## [43] "elec3_Total"             "departamento"           
## [45] "provincia"               "Castillo"               
## [47] "Keiko"                   "ganaCastillo"           
## [49] "countPositivos"          "countFallecidos"
str(data2)
## 'data.frame':    196 obs. of  50 variables:
##  $ ...1                   : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ key                    : chr  "AMAZONAS+BAGUA" "AMAZONAS+BONGARA" "AMAZONAS+CHACHAPOYAS" "AMAZONAS+CONDORCANQUI" ...
##  $ Código                 : num  102 103 101 104 105 106 107 202 203 204 ...
##  $ pared1_Ladrillo        : num  4633 1602 3782 291 430 ...
##  $ pared2_Piedra          : num  46 9 22 7 7 7 35 1 0 3 ...
##  $ pared3_Adobe           : num  6639 2729 5881 672 5217 ...
##  $ pared4_Tapia           : num  222 240 2476 8 6052 ...
##  $ pared5_Quincha         : num  2518 157 309 386 346 ...
##  $ pared6_Piedra          : num  127 36 168 7 54 28 518 65 7 6 ...
##  $ pared7_Madera          : num  4484 2505 1270 8145 606 ...
##  $ pared8_Triplay         : num  851 30 91 200 45 24 210 18 0 1 ...
##  $ pared9_Otro            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pared10_Total          : num  19520 7308 13999 9716 12757 ...
##  $ techo1_Concreto        : num  2187 692 2262 56 187 ...
##  $ techo2_Madera          : num  294 75 160 188 43 48 340 57 12 8 ...
##  $ techo3_Tejas           : num  179 382 3393 177 3071 ...
##  $ techo4_Planchas        : num  13186 6084 8005 2036 9343 ...
##  $ techo5_Caña            : num  160 38 50 15 26 15 196 10 8 5 ...
##  $ techo6_Triplay         : num  106 5 14 10 12 5 62 17 4 3 ...
##  $ techo7_Paja            : num  3408 32 115 7234 75 ...
##  $ techo8_Otro            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ techo9_Total           : num  19520 7308 13999 9716 12757 ...
##  $ piso1_Parquet          : num  6 5 23 2 4 3 20 0 0 5 ...
##  $ piso2_Láminas          : num  19 2 36 0 0 4 32 0 0 1 ...
##  $ piso3_Losetas          : num  647 165 1077 20 46 ...
##  $ piso4_Madera           : num  157 132 240 1523 295 ...
##  $ piso5_Cemento          : num  7121 2917 6189 943 1911 ...
##  $ piso6_Tierra           : num  11569 4087 6434 7228 10501 ...
##  $ piso7_Otro             : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ piso8_Total            : num  19520 7308 13999 9716 12757 ...
##  $ agua1_Red              : num  9429 4569 10647 1307 7172 ...
##  $ agua2_Red_fueraVivienda: num  4392 1497 1619 867 3097 ...
##  $ agua3_Pilón            : num  793 215 184 1003 1112 ...
##  $ agua4_Camión           : num  59 0 49 2 0 0 117 0 0 0 ...
##  $ agua5_Pozo             : num  1792 474 876 2564 819 ...
##  $ agua6_Manantial        : num  270 67 92 431 132 211 471 121 61 27 ...
##  $ agua7_Río              : num  2648 388 488 3428 369 ...
##  $ agua8_Otro             : num  56 61 24 80 9 29 104 2 1 6 ...
##  $ agua9_Vecino           : num  81 37 20 34 47 8 177 9 4 6 ...
##  $ agua10_Total           : num  19520 7308 13999 9716 12757 ...
##  $ elec1_Sí               : num  13204 6025 12248 1792 10886 ...
##  $ elec2_No               : num  6316 1283 1751 7924 1871 ...
##  $ elec3_Total            : num  19520 7308 13999 9716 12757 ...
##  $ departamento           : chr  "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
##  $ provincia              : chr  "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
##  $ Castillo               : num  25629 8374 15671 13154 12606 ...
##  $ Keiko                  : num  10770 5209 10473 1446 7840 ...
##  $ ganaCastillo           : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ countPositivos         : num  8126 389 2174 3481 456 ...
##  $ countFallecidos        : num  462 72 281 111 88 60 336 26 31 21 ...
data2$agua1_Red_pct = data2$agua1_Red/data2$agua10_Total
data2$razon_keiko_castillo <- data2$Keiko / data2$Castillo
data2$tasa_fallecidos_por_1000 <- (data2$countFallecidos / data2$countPositivos) * 1000
data2 <- data2 %>%
  slice(-135)    #fila específica
names(data2)
##  [1] "...1"                     "key"                     
##  [3] "Código"                   "pared1_Ladrillo"         
##  [5] "pared2_Piedra"            "pared3_Adobe"            
##  [7] "pared4_Tapia"             "pared5_Quincha"          
##  [9] "pared6_Piedra"            "pared7_Madera"           
## [11] "pared8_Triplay"           "pared9_Otro"             
## [13] "pared10_Total"            "techo1_Concreto"         
## [15] "techo2_Madera"            "techo3_Tejas"            
## [17] "techo4_Planchas"          "techo5_Caña"             
## [19] "techo6_Triplay"           "techo7_Paja"             
## [21] "techo8_Otro"              "techo9_Total"            
## [23] "piso1_Parquet"            "piso2_Láminas"           
## [25] "piso3_Losetas"            "piso4_Madera"            
## [27] "piso5_Cemento"            "piso6_Tierra"            
## [29] "piso7_Otro"               "piso8_Total"             
## [31] "agua1_Red"                "agua2_Red_fueraVivienda" 
## [33] "agua3_Pilón"              "agua4_Camión"            
## [35] "agua5_Pozo"               "agua6_Manantial"         
## [37] "agua7_Río"                "agua8_Otro"              
## [39] "agua9_Vecino"             "agua10_Total"            
## [41] "elec1_Sí"                 "elec2_No"                
## [43] "elec3_Total"              "departamento"            
## [45] "provincia"                "Castillo"                
## [47] "Keiko"                    "ganaCastillo"            
## [49] "countPositivos"           "countFallecidos"         
## [51] "agua1_Red_pct"            "razon_keiko_castillo"    
## [53] "tasa_fallecidos_por_1000"
library(BBmisc)
## 
## Adjuntando el paquete: 'BBmisc'
## The following object is masked from 'package:lme4':
## 
##     namedList
## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse, symdiff
## The following object is masked from 'package:base':
## 
##     isFALSE
data2[,c(51:53)]=normalize(data2[,c(51:53)],method='range',range=c(0,10))
data2_nueva <- data2[, c("provincia", "agua1_Red_pct", "razon_keiko_castillo","tasa_fallecidos_por_1000")]  #puedo chancar la misma data
row.names(data2_nueva)=data2_nueva$provincia
str(data2_nueva)
## 'data.frame':    195 obs. of  4 variables:
##  $ provincia               : chr  "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
##  $ agua1_Red_pct           : num  5.6 7.25 8.83 1.53 6.52 ...
##  $ razon_keiko_castillo    : num  1.356 2.072 2.236 0.255 2.072 ...
##  $ tasa_fallecidos_por_1000: num  0.2304 0.9735 0.6499 0.0857 1.0192 ...
data2_nueva <- data2_nueva[, !names(data2_nueva) %in% c("provincia")]
library(cluster)
g.dist = daisy(data2_nueva, metric="gower")

Partición

cantidad de clusters

## para PAM

library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(data2_nueva, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

library(kableExtra)
set.seed(123)
res.pam=pam(g.dist,3,cluster.only = F)

#nueva columna
data2_nueva$pam=res.pam$cluster

# ver

head(data2_nueva,15)%>%kbl()%>%kable_styling()
agua1_Red_pct razon_keiko_castillo tasa_fallecidos_por_1000 pam
BAGUA 5.595250 1.3562051 0.2303939 1
BONGARA 7.252007 2.0722832 0.9734590 2
CHACHAPOYAS 8.829363 2.2364210 0.6499201 2
CONDORCANQUI 1.533573 0.2552194 0.0857208 1
LUYA 6.517758 2.0718633 1.0191922 2
RODRÍGUEZ DE MENDOZA 6.845180 2.3106252 3.0616052 2
UTCUBAMBA 5.642576 1.7316935 0.4202772 1
AIJA 8.677843 2.0215359 1.8080112 2
ANTONIO RAYMONDI 9.905313 0.4181713 3.2274421 2
ASUNCIÓN 8.278577 0.8911624 1.9634107 2
BOLOGNESI 8.392707 1.7079996 2.1996078 2
CARHUAZ 8.482075 1.4880190 1.6120164 2
CARLOS FERMÍN FITZCARRALD 7.686527 0.7969638 3.4190604 2
CASMA 7.009008 4.7344858 2.0791648 3
CORONGO 8.725067 2.2480185 2.8765216 2
fviz_silhouette(res.pam,print.summary = F)

##Jeararquico aglomerativo

## PARA JERARQUICO

fviz_nbclust(data2_nueva, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
library(factoextra)

res.agnes<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")

data2_nueva$agnes=res.agnes$cluster

# ver

head(data2_nueva,15)%>%kbl()%>%kable_styling()
agua1_Red_pct razon_keiko_castillo tasa_fallecidos_por_1000 pam agnes
BAGUA 5.595250 1.3562051 0.2303939 1 1
BONGARA 7.252007 2.0722832 0.9734590 2 2
CHACHAPOYAS 8.829363 2.2364210 0.6499201 2 2
CONDORCANQUI 1.533573 0.2552194 0.0857208 1 3
LUYA 6.517758 2.0718633 1.0191922 2 2
RODRÍGUEZ DE MENDOZA 6.845180 2.3106252 3.0616052 2 2
UTCUBAMBA 5.642576 1.7316935 0.4202772 1 1
AIJA 8.677843 2.0215359 1.8080112 2 2
ANTONIO RAYMONDI 9.905313 0.4181713 3.2274421 2 2
ASUNCIÓN 8.278577 0.8911624 1.9634107 2 2
BOLOGNESI 8.392707 1.7079996 2.1996078 2 2
CARHUAZ 8.482075 1.4880190 1.6120164 2 2
CARLOS FERMÍN FITZCARRALD 7.686527 0.7969638 3.4190604 2 2
CASMA 7.009008 4.7344858 2.0791648 3 4
CORONGO 8.725067 2.2480185 2.8765216 2 2
fviz_silhouette(res.agnes,print.summary = F)

##Jerarquico divisivo