library(AER)
## Cargando paquete requerido: car
## Cargando paquete requerido: carData
## Cargando paquete requerido: lmtest
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Cargando paquete requerido: sandwich
## Cargando paquete requerido: survival
library(rio)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(arm)
## Cargando paquete requerido: MASS
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Cargando paquete requerido: Matrix
## Cargando paquete requerido: lme4
##
## Adjuntando el paquete: 'lme4'
## The following object is masked from 'package:rio':
##
## factorize
##
## arm (Version 1.14-4, built: 2024-4-1)
## Working directory is F:/PUCP/PUCP Sociales/Ciclo 6/Estadistica 2/Examen Final
##
## Adjuntando el paquete: 'arm'
## The following object is masked from 'package:car':
##
## logit
library(Matrix)
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(lme4)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ dplyr::recode() masks car::recode()
## ✖ MASS::select() masks dplyr::select()
## ✖ purrr::some() masks car::some()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data2 = import("data2.xlsx")
## New names:
## • `` -> `...1`
names(data2)
## [1] "...1" "key"
## [3] "Código" "pared1_Ladrillo"
## [5] "pared2_Piedra" "pared3_Adobe"
## [7] "pared4_Tapia" "pared5_Quincha"
## [9] "pared6_Piedra" "pared7_Madera"
## [11] "pared8_Triplay" "pared9_Otro"
## [13] "pared10_Total" "techo1_Concreto"
## [15] "techo2_Madera" "techo3_Tejas"
## [17] "techo4_Planchas" "techo5_Caña"
## [19] "techo6_Triplay" "techo7_Paja"
## [21] "techo8_Otro" "techo9_Total"
## [23] "piso1_Parquet" "piso2_Láminas"
## [25] "piso3_Losetas" "piso4_Madera"
## [27] "piso5_Cemento" "piso6_Tierra"
## [29] "piso7_Otro" "piso8_Total"
## [31] "agua1_Red" "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón" "agua4_Camión"
## [35] "agua5_Pozo" "agua6_Manantial"
## [37] "agua7_Río" "agua8_Otro"
## [39] "agua9_Vecino" "agua10_Total"
## [41] "elec1_Sí" "elec2_No"
## [43] "elec3_Total" "departamento"
## [45] "provincia" "Castillo"
## [47] "Keiko" "ganaCastillo"
## [49] "countPositivos" "countFallecidos"
str(data2)
## 'data.frame': 196 obs. of 50 variables:
## $ ...1 : num 1 2 3 4 5 6 7 8 9 10 ...
## $ key : chr "AMAZONAS+BAGUA" "AMAZONAS+BONGARA" "AMAZONAS+CHACHAPOYAS" "AMAZONAS+CONDORCANQUI" ...
## $ Código : num 102 103 101 104 105 106 107 202 203 204 ...
## $ pared1_Ladrillo : num 4633 1602 3782 291 430 ...
## $ pared2_Piedra : num 46 9 22 7 7 7 35 1 0 3 ...
## $ pared3_Adobe : num 6639 2729 5881 672 5217 ...
## $ pared4_Tapia : num 222 240 2476 8 6052 ...
## $ pared5_Quincha : num 2518 157 309 386 346 ...
## $ pared6_Piedra : num 127 36 168 7 54 28 518 65 7 6 ...
## $ pared7_Madera : num 4484 2505 1270 8145 606 ...
## $ pared8_Triplay : num 851 30 91 200 45 24 210 18 0 1 ...
## $ pared9_Otro : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pared10_Total : num 19520 7308 13999 9716 12757 ...
## $ techo1_Concreto : num 2187 692 2262 56 187 ...
## $ techo2_Madera : num 294 75 160 188 43 48 340 57 12 8 ...
## $ techo3_Tejas : num 179 382 3393 177 3071 ...
## $ techo4_Planchas : num 13186 6084 8005 2036 9343 ...
## $ techo5_Caña : num 160 38 50 15 26 15 196 10 8 5 ...
## $ techo6_Triplay : num 106 5 14 10 12 5 62 17 4 3 ...
## $ techo7_Paja : num 3408 32 115 7234 75 ...
## $ techo8_Otro : num 0 0 0 0 0 0 0 0 0 0 ...
## $ techo9_Total : num 19520 7308 13999 9716 12757 ...
## $ piso1_Parquet : num 6 5 23 2 4 3 20 0 0 5 ...
## $ piso2_Láminas : num 19 2 36 0 0 4 32 0 0 1 ...
## $ piso3_Losetas : num 647 165 1077 20 46 ...
## $ piso4_Madera : num 157 132 240 1523 295 ...
## $ piso5_Cemento : num 7121 2917 6189 943 1911 ...
## $ piso6_Tierra : num 11569 4087 6434 7228 10501 ...
## $ piso7_Otro : num 1 0 0 0 0 0 0 0 0 0 ...
## $ piso8_Total : num 19520 7308 13999 9716 12757 ...
## $ agua1_Red : num 9429 4569 10647 1307 7172 ...
## $ agua2_Red_fueraVivienda: num 4392 1497 1619 867 3097 ...
## $ agua3_Pilón : num 793 215 184 1003 1112 ...
## $ agua4_Camión : num 59 0 49 2 0 0 117 0 0 0 ...
## $ agua5_Pozo : num 1792 474 876 2564 819 ...
## $ agua6_Manantial : num 270 67 92 431 132 211 471 121 61 27 ...
## $ agua7_Río : num 2648 388 488 3428 369 ...
## $ agua8_Otro : num 56 61 24 80 9 29 104 2 1 6 ...
## $ agua9_Vecino : num 81 37 20 34 47 8 177 9 4 6 ...
## $ agua10_Total : num 19520 7308 13999 9716 12757 ...
## $ elec1_Sí : num 13204 6025 12248 1792 10886 ...
## $ elec2_No : num 6316 1283 1751 7924 1871 ...
## $ elec3_Total : num 19520 7308 13999 9716 12757 ...
## $ departamento : chr "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
## $ provincia : chr "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
## $ Castillo : num 25629 8374 15671 13154 12606 ...
## $ Keiko : num 10770 5209 10473 1446 7840 ...
## $ ganaCastillo : num 1 1 1 1 1 1 1 1 1 1 ...
## $ countPositivos : num 8126 389 2174 3481 456 ...
## $ countFallecidos : num 462 72 281 111 88 60 336 26 31 21 ...
data2$agua1_Red_pct = data2$agua1_Red/data2$agua10_Total
data2$razon_keiko_castillo <- data2$Keiko / data2$Castillo
data2$tasa_fallecidos_por_1000 <- (data2$countFallecidos / data2$countPositivos) * 1000
data2 <- data2 %>%
slice(-135) #fila específica
names(data2)
## [1] "...1" "key"
## [3] "Código" "pared1_Ladrillo"
## [5] "pared2_Piedra" "pared3_Adobe"
## [7] "pared4_Tapia" "pared5_Quincha"
## [9] "pared6_Piedra" "pared7_Madera"
## [11] "pared8_Triplay" "pared9_Otro"
## [13] "pared10_Total" "techo1_Concreto"
## [15] "techo2_Madera" "techo3_Tejas"
## [17] "techo4_Planchas" "techo5_Caña"
## [19] "techo6_Triplay" "techo7_Paja"
## [21] "techo8_Otro" "techo9_Total"
## [23] "piso1_Parquet" "piso2_Láminas"
## [25] "piso3_Losetas" "piso4_Madera"
## [27] "piso5_Cemento" "piso6_Tierra"
## [29] "piso7_Otro" "piso8_Total"
## [31] "agua1_Red" "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón" "agua4_Camión"
## [35] "agua5_Pozo" "agua6_Manantial"
## [37] "agua7_Río" "agua8_Otro"
## [39] "agua9_Vecino" "agua10_Total"
## [41] "elec1_Sí" "elec2_No"
## [43] "elec3_Total" "departamento"
## [45] "provincia" "Castillo"
## [47] "Keiko" "ganaCastillo"
## [49] "countPositivos" "countFallecidos"
## [51] "agua1_Red_pct" "razon_keiko_castillo"
## [53] "tasa_fallecidos_por_1000"
library(BBmisc)
##
## Adjuntando el paquete: 'BBmisc'
## The following object is masked from 'package:lme4':
##
## namedList
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse, symdiff
## The following object is masked from 'package:base':
##
## isFALSE
data2[,c(51:53)]=normalize(data2[,c(51:53)],method='range',range=c(0,10))
data2_nueva <- data2[, c("provincia", "agua1_Red_pct", "razon_keiko_castillo","tasa_fallecidos_por_1000")] #puedo chancar la misma data
row.names(data2_nueva)=data2_nueva$provincia
str(data2_nueva)
## 'data.frame': 195 obs. of 4 variables:
## $ provincia : chr "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
## $ agua1_Red_pct : num 5.6 7.25 8.83 1.53 6.52 ...
## $ razon_keiko_castillo : num 1.356 2.072 2.236 0.255 2.072 ...
## $ tasa_fallecidos_por_1000: num 0.2304 0.9735 0.6499 0.0857 1.0192 ...
data2_nueva <- data2_nueva[, !names(data2_nueva) %in% c("provincia")]
library(cluster)
g.dist = daisy(data2_nueva, metric="gower")
## para PAM
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(data2_nueva, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
library(kableExtra)
set.seed(123)
res.pam=pam(g.dist,3,cluster.only = F)
#nueva columna
data2_nueva$pam=res.pam$cluster
# ver
head(data2_nueva,15)%>%kbl()%>%kable_styling()
agua1_Red_pct | razon_keiko_castillo | tasa_fallecidos_por_1000 | pam | |
---|---|---|---|---|
BAGUA | 5.595250 | 1.3562051 | 0.2303939 | 1 |
BONGARA | 7.252007 | 2.0722832 | 0.9734590 | 2 |
CHACHAPOYAS | 8.829363 | 2.2364210 | 0.6499201 | 2 |
CONDORCANQUI | 1.533573 | 0.2552194 | 0.0857208 | 1 |
LUYA | 6.517758 | 2.0718633 | 1.0191922 | 2 |
RODRÍGUEZ DE MENDOZA | 6.845180 | 2.3106252 | 3.0616052 | 2 |
UTCUBAMBA | 5.642576 | 1.7316935 | 0.4202772 | 1 |
AIJA | 8.677843 | 2.0215359 | 1.8080112 | 2 |
ANTONIO RAYMONDI | 9.905313 | 0.4181713 | 3.2274421 | 2 |
ASUNCIÓN | 8.278577 | 0.8911624 | 1.9634107 | 2 |
BOLOGNESI | 8.392707 | 1.7079996 | 2.1996078 | 2 |
CARHUAZ | 8.482075 | 1.4880190 | 1.6120164 | 2 |
CARLOS FERMÍN FITZCARRALD | 7.686527 | 0.7969638 | 3.4190604 | 2 |
CASMA | 7.009008 | 4.7344858 | 2.0791648 | 3 |
CORONGO | 8.725067 | 2.2480185 | 2.8765216 | 2 |
fviz_silhouette(res.pam,print.summary = F)
##Jeararquico aglomerativo
## PARA JERARQUICO
fviz_nbclust(data2_nueva, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
set.seed(123)
library(factoextra)
res.agnes<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")
data2_nueva$agnes=res.agnes$cluster
# ver
head(data2_nueva,15)%>%kbl()%>%kable_styling()
agua1_Red_pct | razon_keiko_castillo | tasa_fallecidos_por_1000 | pam | agnes | |
---|---|---|---|---|---|
BAGUA | 5.595250 | 1.3562051 | 0.2303939 | 1 | 1 |
BONGARA | 7.252007 | 2.0722832 | 0.9734590 | 2 | 2 |
CHACHAPOYAS | 8.829363 | 2.2364210 | 0.6499201 | 2 | 2 |
CONDORCANQUI | 1.533573 | 0.2552194 | 0.0857208 | 1 | 3 |
LUYA | 6.517758 | 2.0718633 | 1.0191922 | 2 | 2 |
RODRÍGUEZ DE MENDOZA | 6.845180 | 2.3106252 | 3.0616052 | 2 | 2 |
UTCUBAMBA | 5.642576 | 1.7316935 | 0.4202772 | 1 | 1 |
AIJA | 8.677843 | 2.0215359 | 1.8080112 | 2 | 2 |
ANTONIO RAYMONDI | 9.905313 | 0.4181713 | 3.2274421 | 2 | 2 |
ASUNCIÓN | 8.278577 | 0.8911624 | 1.9634107 | 2 | 2 |
BOLOGNESI | 8.392707 | 1.7079996 | 2.1996078 | 2 | 2 |
CARHUAZ | 8.482075 | 1.4880190 | 1.6120164 | 2 | 2 |
CARLOS FERMÍN FITZCARRALD | 7.686527 | 0.7969638 | 3.4190604 | 2 | 2 |
CASMA | 7.009008 | 4.7344858 | 2.0791648 | 3 | 4 |
CORONGO | 8.725067 | 2.2480185 | 2.8765216 | 2 | 2 |
fviz_silhouette(res.agnes,print.summary = F)
##Jerarquico divisivo