setwd("~/FINAL EAP2")

library(rio)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(polycor)
library(psych)
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:polycor':
## 
##     polyserial
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(cluster)
library(matrixcalc)
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## 
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(BBmisc)
## 
## Attaching package: 'BBmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse, symdiff
## 
## The following object is masked from 'package:base':
## 
##     isFALSE
data_2 = import("dataOK.xlsx")
## New names:
## • `` -> `...1`
names(data_2)
##  [1] "...1"                    "key"                    
##  [3] "Código"                  "pared1_Ladrillo"        
##  [5] "pared2_Piedra"           "pared3_Adobe"           
##  [7] "pared4_Tapia"            "pared5_Quincha"         
##  [9] "pared6_Piedra"           "pared7_Madera"          
## [11] "pared8_Triplay"          "pared9_Otro"            
## [13] "pared10_Total"           "techo1_Concreto"        
## [15] "techo2_Madera"           "techo3_Tejas"           
## [17] "techo4_Planchas"         "techo5_Caña"            
## [19] "techo6_Triplay"          "techo7_Paja"            
## [21] "techo8_Otro"             "techo9_Total"           
## [23] "piso1_Parquet"           "piso2_Láminas"          
## [25] "piso3_Losetas"           "piso4_Madera"           
## [27] "piso5_Cemento"           "piso6_Tierra"           
## [29] "piso7_Otro"              "piso8_Total"            
## [31] "agua1_Red"               "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón"             "agua4_Camión"           
## [35] "agua5_Pozo"              "agua6_Manantial"        
## [37] "agua7_Río"               "agua8_Otro"             
## [39] "agua9_Vecino"            "agua10_Total"           
## [41] "elec1_Sí"                "elec2_No"               
## [43] "elec3_Total"             "departamento"           
## [45] "provincia"               "Castillo"               
## [47] "Keiko"                   "ganaCastillo"           
## [49] "covidPositivos"          "covidFallecidos"
data_selected <- data_2 %>%
  select(`agua1_Red`, `Keiko`, `covidFallecidos`) %>%
  filter(!is.na(`agua1_Red`), !is.na(`Keiko`), !is.na(`covidFallecidos`))

# Normalización de los datos
data_normalized <- scale(data_selected)
library(factoextra)
fviz_nbclust(data_normalized, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

## PARA JERARQUICO

fviz_nbclust(data_normalized, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

# Método jerárquico divisivo
res.diana <- diana(data_normalized)

# Visualización del dendrograma divisivo
fviz_dend(res.diana, cex = 0.7, horiz = TRUE, main = "Dendrograma - DIANA")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fviz_nbclust(data_normalized, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")