library(rio)
lkCSV="https://docs.google.com/spreadsheets/d/1PVIpzCuw3m5fOS1SfbD0DIfxvMP3KQGy65F9qHl4AE8/edit?gid=805156833#gid=805156833"
data=import(lkCSV)

#Se quiere organizar un indicador complejo de probreza, asumiendo que las variables de calidad de techo, pared,piso y la disponibilidad de agua representa cada una un concepto o variable latente.

Ud hará el con el archivo entregado (no considere las mediciones ‘otros’ en ningun caso, ni otras que su criterio de analista le dicte).

Al buscar construir el indicador complejo Ud encuentra:

names(data)
##  [1] "V1"                      "key"                    
##  [3] "Código"                  "pared1_Ladrillo"        
##  [5] "pared2_Piedra"           "pared3_Adobe"           
##  [7] "pared4_Tapia"            "pared5_Quincha"         
##  [9] "pared6_Piedra"           "pared7_Madera"          
## [11] "pared8_Triplay"          "pared9_Otro"            
## [13] "pared10_Total"           "techo1_Concreto"        
## [15] "techo2_Madera"           "techo3_Tejas"           
## [17] "techo4_Planchas"         "techo5_Caña"            
## [19] "techo6_Triplay"          "techo7_Paja"            
## [21] "techo8_Otro"             "techo9_Total"           
## [23] "piso1_Parquet"           "piso2_Láminas"          
## [25] "piso3_Losetas"           "piso4_Madera"           
## [27] "piso5_Cemento"           "piso6_Tierra"           
## [29] "piso7_Otro"              "piso8_Total"            
## [31] "agua1_Red"               "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón"             "agua4_Camión"           
## [35] "agua5_Pozo"              "agua6_Manantial"        
## [37] "agua7_Río"               "agua8_Otro"             
## [39] "agua9_Vecino"            "agua10_Total"           
## [41] "elec1_Sí"                "elec2_No"               
## [43] "elec3_Total"             "departamento"           
## [45] "provincia"               "Castillo"               
## [47] "Keiko"                   "ganaCastillo"           
## [49] "countPositivos"          "countFallecidos"
dontselect=c("V1","key","","Código","pared9_Otro","pared10_Total", "techo8_Otro", "techo9_Total", "piso7_Otro", "piso8_Total", "agua8_Otro", "agua9_Vecino" , "agua10_Total",  "elec1_Sí", "elec2_No", "elec3_Total", "departamento", "provincia" ,"Castillo", "Keiko", "ganaCastillo", "countPositivos", "countFallecidos")
select=setdiff(names(data),dontselect) 
DF=data[,select]

# usaremos:
library(magrittr)
head(DF,10)%>%
    rmarkdown::paged_table()
library(polycor)
corMatrix=polycor::hetcor(DF)$correlations
round(corMatrix,2)
##                         pared1_Ladrillo pared2_Piedra pared3_Adobe pared4_Tapia
## pared1_Ladrillo                    1.00          0.65         0.35        -0.03
## pared2_Piedra                      0.65          1.00         0.20        -0.05
## pared3_Adobe                       0.35          0.20         1.00         0.00
## pared4_Tapia                      -0.03         -0.05         0.00         1.00
## pared5_Quincha                     0.22          0.14         0.15        -0.11
## pared6_Piedra                      0.07          0.10         0.18         0.06
## pared7_Madera                      0.89          0.50         0.16        -0.09
## pared8_Triplay                     0.85          0.50         0.27        -0.09
## techo1_Concreto                    1.00          0.65         0.35        -0.02
## techo2_Madera                      0.96          0.55         0.26        -0.04
## techo3_Tejas                       0.13          0.04         0.45         0.41
## techo4_Planchas                    0.92          0.62         0.46        -0.02
## techo5_Caña                        0.44          0.24         0.38        -0.13
## techo6_Triplay                     0.84          0.49         0.29        -0.10
## techo7_Paja                        0.04          0.02         0.08        -0.12
## piso1_Parquet                      0.99          0.59         0.31        -0.02
## piso2_Láminas                      0.99          0.69         0.30        -0.03
## piso3_Losetas                      1.00          0.62         0.36        -0.03
## piso4_Madera                       0.57          0.32         0.12        -0.02
## piso5_Cemento                      1.00          0.67         0.38        -0.03
## piso6_Tierra                       0.69          0.45         0.72         0.14
## agua1_Red                          1.00          0.64         0.39        -0.02
## agua2_Red_fueraVivienda            0.99          0.59         0.40         0.01
## agua3_Pilón                        0.91          0.84         0.38        -0.02
## agua4_Camión                       0.99          0.61         0.33        -0.06
## agua5_Pozo                         0.37          0.25         0.45         0.02
## agua6_Manantial                   -0.05         -0.03         0.27         0.24
## agua7_Río                          0.01          0.00         0.06        -0.06
##                         pared5_Quincha pared6_Piedra pared7_Madera
## pared1_Ladrillo                   0.22          0.07          0.89
## pared2_Piedra                     0.14          0.10          0.50
## pared3_Adobe                      0.15          0.18          0.16
## pared4_Tapia                     -0.11          0.06         -0.09
## pared5_Quincha                    1.00         -0.03          0.17
## pared6_Piedra                    -0.03          1.00          0.04
## pared7_Madera                     0.17          0.04          1.00
## pared8_Triplay                    0.45          0.04          0.74
## techo1_Concreto                   0.20          0.07          0.88
## techo2_Madera                     0.18          0.05          0.94
## techo3_Tejas                     -0.05          0.00          0.03
## techo4_Planchas                   0.43          0.12          0.87
## techo5_Caña                       0.16         -0.05          0.31
## techo6_Triplay                    0.25          0.03          0.73
## techo7_Paja                      -0.06          0.26          0.24
## piso1_Parquet                     0.17          0.07          0.89
## piso2_Láminas                     0.18          0.07          0.88
## piso3_Losetas                     0.22          0.06          0.89
## piso4_Madera                      0.04          0.02          0.76
## piso5_Cemento                     0.24          0.07          0.89
## piso6_Tierra                      0.50          0.23          0.58
## agua1_Red                         0.24          0.07          0.89
## agua2_Red_fueraVivienda           0.21          0.10          0.90
## agua3_Pilón                       0.28          0.11          0.79
## agua4_Camión                      0.27          0.06          0.88
## agua5_Pozo                        0.07          0.24          0.45
## agua6_Manantial                  -0.04          0.39          0.00
## agua7_Río                         0.47          0.10          0.19
##                         pared8_Triplay techo1_Concreto techo2_Madera
## pared1_Ladrillo                   0.85            1.00          0.96
## pared2_Piedra                     0.50            0.65          0.55
## pared3_Adobe                      0.27            0.35          0.26
## pared4_Tapia                     -0.09           -0.02         -0.04
## pared5_Quincha                    0.45            0.20          0.18
## pared6_Piedra                     0.04            0.07          0.05
## pared7_Madera                     0.74            0.88          0.94
## pared8_Triplay                    1.00            0.84          0.79
## techo1_Concreto                   0.84            1.00          0.96
## techo2_Madera                     0.79            0.96          1.00
## techo3_Tejas                      0.04            0.13          0.10
## techo4_Planchas                   0.85            0.91          0.88
## techo5_Caña                       0.51            0.44          0.39
## techo6_Triplay                    0.93            0.83          0.80
## techo7_Paja                       0.01            0.04          0.08
## piso1_Parquet                     0.81            0.99          0.96
## piso2_Láminas                     0.81            0.99          0.96
## piso3_Losetas                     0.85            1.00          0.97
## piso4_Madera                      0.47            0.56          0.61
## piso5_Cemento                     0.85            0.99          0.96
## piso6_Tierra                      0.72            0.68          0.61
## agua1_Red                         0.85            1.00          0.96
## agua2_Red_fueraVivienda           0.82            0.99          0.96
## agua3_Pilón                       0.79            0.91          0.86
## agua4_Camión                      0.89            0.99          0.95
## agua5_Pozo                        0.29            0.37          0.36
## agua6_Manantial                  -0.08           -0.05         -0.05
## agua7_Río                         0.22           -0.02          0.00
##                         techo3_Tejas techo4_Planchas techo5_Caña techo6_Triplay
## pared1_Ladrillo                 0.13            0.92        0.44           0.84
## pared2_Piedra                   0.04            0.62        0.24           0.49
## pared3_Adobe                    0.45            0.46        0.38           0.29
## pared4_Tapia                    0.41           -0.02       -0.13          -0.10
## pared5_Quincha                 -0.05            0.43        0.16           0.25
## pared6_Piedra                   0.00            0.12       -0.05           0.03
## pared7_Madera                   0.03            0.87        0.31           0.73
## pared8_Triplay                  0.04            0.85        0.51           0.93
## techo1_Concreto                 0.13            0.91        0.44           0.83
## techo2_Madera                   0.10            0.88        0.39           0.80
## techo3_Tejas                    1.00            0.09       -0.05           0.03
## techo4_Planchas                 0.09            1.00        0.37           0.76
## techo5_Caña                    -0.05            0.37        1.00           0.66
## techo6_Triplay                  0.03            0.76        0.66           1.00
## techo7_Paja                    -0.13            0.13       -0.11          -0.02
## piso1_Parquet                   0.13            0.89        0.38           0.80
## piso2_Láminas                   0.12            0.89        0.38           0.80
## piso3_Losetas                   0.14            0.92        0.44           0.84
## piso4_Madera                    0.16            0.63        0.14           0.43
## piso5_Cemento                   0.13            0.93        0.47           0.85
## piso6_Tierra                    0.32            0.84        0.36           0.62
## agua1_Red                       0.16            0.93        0.46           0.84
## agua2_Red_fueraVivienda         0.19            0.92        0.40           0.81
## agua3_Pilón                     0.11            0.90        0.38           0.76
## agua4_Camión                    0.10            0.91        0.46           0.88
## agua5_Pozo                      0.04            0.51        0.15           0.26
## agua6_Manantial                 0.19            0.06       -0.13          -0.10
## agua7_Río                      -0.10            0.27       -0.03           0.04
##                         techo7_Paja piso1_Parquet piso2_Láminas piso3_Losetas
## pared1_Ladrillo                0.04          0.99          0.99          1.00
## pared2_Piedra                  0.02          0.59          0.69          0.62
## pared3_Adobe                   0.08          0.31          0.30          0.36
## pared4_Tapia                  -0.12         -0.02         -0.03         -0.03
## pared5_Quincha                -0.06          0.17          0.18          0.22
## pared6_Piedra                  0.26          0.07          0.07          0.06
## pared7_Madera                  0.24          0.89          0.88          0.89
## pared8_Triplay                 0.01          0.81          0.81          0.85
## techo1_Concreto                0.04          0.99          0.99          1.00
## techo2_Madera                  0.08          0.96          0.96          0.97
## techo3_Tejas                  -0.13          0.13          0.12          0.14
## techo4_Planchas                0.13          0.89          0.89          0.92
## techo5_Caña                   -0.11          0.38          0.38          0.44
## techo6_Triplay                -0.02          0.80          0.80          0.84
## techo7_Paja                    1.00          0.05          0.05          0.04
## piso1_Parquet                  0.05          1.00          0.99          0.99
## piso2_Láminas                  0.05          0.99          1.00          0.99
## piso3_Losetas                  0.04          0.99          0.99          1.00
## piso4_Madera                   0.34          0.57          0.56          0.57
## piso5_Cemento                  0.04          0.98          0.98          0.99
## piso6_Tierra                   0.20          0.63          0.64          0.68
## agua1_Red                      0.04          0.98          0.99          1.00
## agua2_Red_fueraVivienda        0.06          0.99          0.98          0.99
## agua3_Pilón                    0.08          0.87          0.92          0.90
## agua4_Camión                   0.03          0.98          0.98          0.99
## agua5_Pozo                     0.51          0.35          0.35          0.35
## agua6_Manantial                0.33         -0.04         -0.05         -0.06
## agua7_Río                      0.43         -0.03         -0.03          0.01
##                         piso4_Madera piso5_Cemento piso6_Tierra agua1_Red
## pared1_Ladrillo                 0.57          1.00         0.69      1.00
## pared2_Piedra                   0.32          0.67         0.45      0.64
## pared3_Adobe                    0.12          0.38         0.72      0.39
## pared4_Tapia                   -0.02         -0.03         0.14     -0.02
## pared5_Quincha                  0.04          0.24         0.50      0.24
## pared6_Piedra                   0.02          0.07         0.23      0.07
## pared7_Madera                   0.76          0.89         0.58      0.89
## pared8_Triplay                  0.47          0.85         0.72      0.85
## techo1_Concreto                 0.56          0.99         0.68      1.00
## techo2_Madera                   0.61          0.96         0.61      0.96
## techo3_Tejas                    0.16          0.13         0.32      0.16
## techo4_Planchas                 0.63          0.93         0.84      0.93
## techo5_Caña                     0.14          0.47         0.36      0.46
## techo6_Triplay                  0.43          0.85         0.62      0.84
## techo7_Paja                     0.34          0.04         0.20      0.04
## piso1_Parquet                   0.57          0.98         0.63      0.98
## piso2_Láminas                   0.56          0.98         0.64      0.99
## piso3_Losetas                   0.57          0.99         0.68      1.00
## piso4_Madera                    1.00          0.58         0.38      0.58
## piso5_Cemento                   0.58          1.00         0.71      1.00
## piso6_Tierra                    0.38          0.71         1.00      0.72
## agua1_Red                       0.58          1.00         0.72      1.00
## agua2_Red_fueraVivienda         0.59          0.98         0.72      0.99
## agua3_Pilón                     0.51          0.92         0.73      0.91
## agua4_Camión                    0.56          0.98         0.69      0.99
## agua5_Pozo                      0.43          0.40         0.59      0.38
## agua6_Manantial                -0.01         -0.05         0.30     -0.05
## agua7_Río                       0.35          0.02         0.37      0.02
##                         agua2_Red_fueraVivienda agua3_Pilón agua4_Camión
## pared1_Ladrillo                            0.99        0.91         0.99
## pared2_Piedra                              0.59        0.84         0.61
## pared3_Adobe                               0.40        0.38         0.33
## pared4_Tapia                               0.01       -0.02        -0.06
## pared5_Quincha                             0.21        0.28         0.27
## pared6_Piedra                              0.10        0.11         0.06
## pared7_Madera                              0.90        0.79         0.88
## pared8_Triplay                             0.82        0.79         0.89
## techo1_Concreto                            0.99        0.91         0.99
## techo2_Madera                              0.96        0.86         0.95
## techo3_Tejas                               0.19        0.11         0.10
## techo4_Planchas                            0.92        0.90         0.91
## techo5_Caña                                0.40        0.38         0.46
## techo6_Triplay                             0.81        0.76         0.88
## techo7_Paja                                0.06        0.08         0.03
## piso1_Parquet                              0.99        0.87         0.98
## piso2_Láminas                              0.98        0.92         0.98
## piso3_Losetas                              0.99        0.90         0.99
## piso4_Madera                               0.59        0.51         0.56
## piso5_Cemento                              0.98        0.92         0.98
## piso6_Tierra                               0.72        0.73         0.69
## agua1_Red                                  0.99        0.91         0.99
## agua2_Red_fueraVivienda                    1.00        0.88         0.97
## agua3_Pilón                                0.88        1.00         0.89
## agua4_Camión                               0.97        0.89         1.00
## agua5_Pozo                                 0.39        0.40         0.34
## agua6_Manantial                            0.03        0.01        -0.07
## agua7_Río                                  0.01        0.10         0.03
##                         agua5_Pozo agua6_Manantial agua7_Río
## pared1_Ladrillo               0.37           -0.05      0.01
## pared2_Piedra                 0.25           -0.03      0.00
## pared3_Adobe                  0.45            0.27      0.06
## pared4_Tapia                  0.02            0.24     -0.06
## pared5_Quincha                0.07           -0.04      0.47
## pared6_Piedra                 0.24            0.39      0.10
## pared7_Madera                 0.45            0.00      0.19
## pared8_Triplay                0.29           -0.08      0.22
## techo1_Concreto               0.37           -0.05     -0.02
## techo2_Madera                 0.36           -0.05      0.00
## techo3_Tejas                  0.04            0.19     -0.10
## techo4_Planchas               0.51            0.06      0.27
## techo5_Caña                   0.15           -0.13     -0.03
## techo6_Triplay                0.26           -0.10      0.04
## techo7_Paja                   0.51            0.33      0.43
## piso1_Parquet                 0.35           -0.04     -0.03
## piso2_Láminas                 0.35           -0.05     -0.03
## piso3_Losetas                 0.35           -0.06      0.01
## piso4_Madera                  0.43           -0.01      0.35
## piso5_Cemento                 0.40           -0.05      0.02
## piso6_Tierra                  0.59            0.30      0.37
## agua1_Red                     0.38           -0.05      0.02
## agua2_Red_fueraVivienda       0.39            0.03      0.01
## agua3_Pilón                   0.40            0.01      0.10
## agua4_Camión                  0.34           -0.07      0.03
## agua5_Pozo                    1.00            0.29      0.27
## agua6_Manantial               0.29            1.00      0.31
## agua7_Río                     0.27            0.31      1.00
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)

library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:polycor':
## 
##     polyserial
psych::KMO(corMatrix) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.46
## MSA for each item = 
##         pared1_Ladrillo           pared2_Piedra            pared3_Adobe 
##                    0.51                    0.31                    0.17 
##            pared4_Tapia          pared5_Quincha           pared6_Piedra 
##                    0.02                    0.10                    0.03 
##           pared7_Madera          pared8_Triplay         techo1_Concreto 
##                    0.46                    0.45                    0.50 
##           techo2_Madera            techo3_Tejas         techo4_Planchas 
##                    0.49                    0.05                    0.49 
##             techo5_Caña          techo6_Triplay             techo7_Paja 
##                    0.19                    0.44                    0.06 
##           piso1_Parquet           piso2_Láminas           piso3_Losetas 
##                    0.72                    0.73                    0.73 
##            piso4_Madera           piso5_Cemento            piso6_Tierra 
##                    0.52                    0.73                    0.63 
##               agua1_Red agua2_Red_fueraVivienda             agua3_Pilón 
##                    0.70                    0.70                    0.67 
##            agua4_Camión              agua5_Pozo         agua6_Manantial 
##                    0.70                    0.36                    0.12 
##               agua7_Río 
##                    0.15
cortest.bartlett(corMatrix,n=nrow(DF))$p.value>0.05
## [1] FALSE
library(matrixcalc)

is.singular.matrix(corMatrix)
## [1] TRUE
fa.parallel(DF, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Parallel analysis suggests that the number of factors =  4  and the number of components =  NA
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
resfa <- fa(DF,
            nfactors = 4,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
print(resfa$loadings)
## 
## Loadings:
##                         MR1    MR2    MR4    MR3   
## pared1_Ladrillo          0.989         0.101       
## pared2_Piedra            0.632                     
## pared3_Adobe             0.267  0.152  0.315  0.634
## pared4_Tapia                          -0.122  0.384
## pared5_Quincha           0.154         0.702 -0.102
## pared6_Piedra                   0.319         0.208
## pared7_Madera            0.905  0.317        -0.158
## pared8_Triplay           0.821         0.468       
## techo1_Concreto          0.990                     
## techo2_Madera            0.970                     
## techo3_Tejas                                  0.596
## techo4_Planchas          0.877  0.260  0.343       
## techo5_Caña              0.423 -0.180  0.304       
## techo6_Triplay           0.829         0.314       
## techo7_Paja                     0.763        -0.113
## piso1_Parquet            0.984                     
## piso2_Láminas            0.990                     
## piso3_Losetas            0.987                     
## piso4_Madera             0.584  0.427        -0.120
## piso5_Cemento            0.985         0.130       
## piso6_Tierra             0.583  0.353  0.573  0.494
## agua1_Red                0.983         0.127  0.118
## agua2_Red_fueraVivienda  0.968                0.166
## agua3_Pilón              0.889  0.108  0.197  0.117
## agua4_Camión             0.978         0.165       
## agua5_Pozo               0.329  0.587         0.207
## agua6_Manantial         -0.113  0.497         0.393
## agua7_Río                       0.617  0.503 -0.202
## 
##                   MR1   MR2   MR4   MR3
## SS loadings    14.885 2.238 1.871 1.612
## Proportion Var  0.532 0.080 0.067 0.058
## Cumulative Var  0.532 0.612 0.678 0.736

#KMO es muy bajo -> NO CUMPLE #BARLETT -> CUMPLE #MATRIZ DE SINGULARIDAD = TRUE, no cumple #POR LO TANTO NO SE PUEDE REALIZAR UN EFA

library(GPArotation)
resfa <- fa(DF,
            nfactors = 4,
            cor = 'mixed',
            rotate = "oblimin", #oblimin?
            fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
print(resfa$loadings)
## 
## Loadings:
##                         MR1    MR3    MR2    MR4   
## pared1_Ladrillo          0.993                     
## pared2_Piedra            0.632                     
## pared3_Adobe                    0.734              
## pared4_Tapia                    0.364        -0.267
## pared5_Quincha                                0.705
## pared6_Piedra                   0.265  0.267       
## pared7_Madera            0.956 -0.187  0.305       
## pared8_Triplay           0.762        -0.102  0.412
## techo1_Concreto          0.999                     
## techo2_Madera            1.007                     
## techo3_Tejas                    0.579 -0.123 -0.301
## techo4_Planchas          0.806  0.158  0.138  0.224
## techo5_Caña              0.376        -0.255  0.241
## techo6_Triplay           0.803        -0.177  0.241
## techo7_Paja                            0.774       
## piso1_Parquet            1.012               -0.101
## piso2_Láminas            1.017                     
## piso3_Losetas            0.992                     
## piso4_Madera             0.617 -0.112  0.425       
## piso5_Cemento            0.980                     
## piso6_Tierra             0.371  0.672  0.117  0.311
## agua1_Red                0.975                     
## agua2_Red_fueraVivienda  0.960  0.120              
## agua3_Pilón              0.857  0.113              
## agua4_Camión             0.978                     
## agua5_Pozo               0.250  0.312  0.498       
## agua6_Manantial         -0.224  0.507  0.402 -0.113
## agua7_Río               -0.169         0.552  0.573
## 
##                   MR1   MR3   MR2   MR4
## SS loadings    14.572 2.016 1.837 1.483
## Proportion Var  0.520 0.072 0.066 0.053
## Cumulative Var  0.520 0.592 0.658 0.711

#CLUSTERIZACIÓN Utilizando el porcentaje de viviendas que tiene electricidad, la razón de votacion de castillo entre keiko, y la tasa fallecidos por cada 1000 contagiados, Ud se propone agrupar a las provincias del Peru (sin la provincia de Lima) siguiendo una estrategia aglomerativa (no corrija correlacion negativa si la hubiera); y en ese proceso Ud. encuentra…

names(data)
##  [1] "V1"                      "key"                    
##  [3] "Código"                  "pared1_Ladrillo"        
##  [5] "pared2_Piedra"           "pared3_Adobe"           
##  [7] "pared4_Tapia"            "pared5_Quincha"         
##  [9] "pared6_Piedra"           "pared7_Madera"          
## [11] "pared8_Triplay"          "pared9_Otro"            
## [13] "pared10_Total"           "techo1_Concreto"        
## [15] "techo2_Madera"           "techo3_Tejas"           
## [17] "techo4_Planchas"         "techo5_Caña"            
## [19] "techo6_Triplay"          "techo7_Paja"            
## [21] "techo8_Otro"             "techo9_Total"           
## [23] "piso1_Parquet"           "piso2_Láminas"          
## [25] "piso3_Losetas"           "piso4_Madera"           
## [27] "piso5_Cemento"           "piso6_Tierra"           
## [29] "piso7_Otro"              "piso8_Total"            
## [31] "agua1_Red"               "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón"             "agua4_Camión"           
## [35] "agua5_Pozo"              "agua6_Manantial"        
## [37] "agua7_Río"               "agua8_Otro"             
## [39] "agua9_Vecino"            "agua10_Total"           
## [41] "elec1_Sí"                "elec2_No"               
## [43] "elec3_Total"             "departamento"           
## [45] "provincia"               "Castillo"               
## [47] "Keiko"                   "ganaCastillo"           
## [49] "countPositivos"          "countFallecidos"
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Excluir una provincia específica
df1 <- data %>%
  filter(provincia != "LIMA")

# Verifica los datos filtrados
head(df1)
##   V1                           key Código pared1_Ladrillo pared2_Piedra
## 1  1                AMAZONAS+BAGUA    102            4633            46
## 2  2              AMAZONAS+BONGARA    103            1602             9
## 3  3          AMAZONAS+CHACHAPOYAS    101            3782            22
## 4  4         AMAZONAS+CONDORCANQUI    104             291             7
## 5  5                 AMAZONAS+LUYA    105             430             7
## 6  6 AMAZONAS+RODRIGUEZ DE MENDOZA    106            1546             7
##   pared3_Adobe pared4_Tapia pared5_Quincha pared6_Piedra pared7_Madera
## 1         6639          222           2518           127          4484
## 2         2729          240            157            36          2505
## 3         5881         2476            309           168          1270
## 4          672            8            386             7          8145
## 5         5217         6052            346            54           606
## 6         2778          155            720            28          3646
##   pared8_Triplay pared9_Otro pared10_Total techo1_Concreto techo2_Madera
## 1            851           0         19520            2187           294
## 2             30           0          7308             692            75
## 3             91           0         13999            2262           160
## 4            200           0          9716              56           188
## 5             45           0         12757             187            43
## 6             24           0          8904             480            48
##   techo3_Tejas techo4_Planchas techo5_Caña techo6_Triplay techo7_Paja
## 1          179           13186         160            106        3408
## 2          382            6084          38              5          32
## 3         3393            8005          50             14         115
## 4          177            2036          15             10        7234
## 5         3071            9343          26             12          75
## 6         2810            5495          15              5          51
##   techo8_Otro techo9_Total piso1_Parquet piso2_Láminas piso3_Losetas
## 1           0        19520             6            19           647
## 2           0         7308             5             2           165
## 3           0        13999            23            36          1077
## 4           0         9716             2             0            20
## 5           0        12757             4             0            46
## 6           0         8904             3             4           264
##   piso4_Madera piso5_Cemento piso6_Tierra piso7_Otro piso8_Total agua1_Red
## 1          157          7121        11569          1       19520      9429
## 2          132          2917         4087          0        7308      4569
## 3          240          6189         6434          0       13999     10647
## 4         1523           943         7228          0        9716      1307
## 5          295          1911        10501          0       12757      7172
## 6          176          2974         5483          0        8904      5256
##   agua2_Red_fueraVivienda agua3_Pilón agua4_Camión agua5_Pozo agua6_Manantial
## 1                    4392         793           59       1792             270
## 2                    1497         215            0        474              67
## 3                    1619         184           49        876              92
## 4                     867        1003            2       2564             431
## 5                    3097        1112            0        819             132
## 6                    1278         154            0       1020             211
##   agua7_Río agua8_Otro agua9_Vecino agua10_Total elec1_Sí elec2_No elec3_Total
## 1      2648         56           81        19520    13204     6316       19520
## 2       388         61           37         7308     6025     1283        7308
## 3       488         24           20        13999    12248     1751       13999
## 4      3428         80           34         9716     1792     7924        9716
## 5       369          9           47        12757    10886     1871       12757
## 6       948         29            8         8904     6895     2009        8904
##   departamento            provincia Castillo Keiko ganaCastillo countPositivos
## 1     AMAZONAS                BAGUA    25629 10770            1           8126
## 2     AMAZONAS              BONGARA     8374  5209            1            389
## 3     AMAZONAS          CHACHAPOYAS    15671 10473            1           2174
## 4     AMAZONAS         CONDORCANQUI    13154  1446            1           3481
## 5     AMAZONAS                 LUYA    12606  7840            1            456
## 6     AMAZONAS RODRÍGUEZ DE MENDOZA     7967  5491            1            110
##   countFallecidos
## 1             462
## 2              72
## 3             281
## 4             111
## 5              88
## 6              60
boxplot(df1[,c(41,48:50)],horizontal = F,las=2,cex.axis = 0.5)

library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse, symdiff
## The following object is masked from 'package:base':
## 
##     isFALSE
boxplot(normalize(df1[,c(41,48:50)],method='range',range=c(0,10)))

boxplot(normalize(df1[,c(41,48:50)],method='standardize'))

cor(df1[,c(41,48:50)])
##                   elec1_Sí ganaCastillo countPositivos countFallecidos
## elec1_Sí         1.0000000   -0.3229785      0.9584402       0.9748019
## ganaCastillo    -0.3229785    1.0000000     -0.3304095      -0.3996211
## countPositivos   0.9584402   -0.3304095      1.0000000       0.9602972
## countFallecidos  0.9748019   -0.3996211      0.9602972       1.0000000
data2=df1[,c(41,48:50)]
row.names(data2)=df1$provincia
library(cluster)
g.dist = daisy(data2, metric="gower")
## Warning in daisy(data2, metric = "gower"): binary variable(s) 2 treated as
## interval scaled

#PARA JERARQUICO

## PARA JERARQUICO
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(data2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
library(factoextra)

res.agnes<- hcut(g.dist, k = 3,hc_func='agnes',hc_method = "ward.D")

data2$agnes=res.agnes$cluster


head(data2,15)
##                           elec1_Sí ganaCastillo countPositivos countFallecidos
## BAGUA                        13204            1           8126             462
## BONGARA                       6025            1            389              72
## CHACHAPOYAS                  12248            1           2174             281
## CONDORCANQUI                  1792            1           3481             111
## LUYA                         10886            1            456              88
## RODRÍGUEZ DE MENDOZA          6895            1            110              60
## UTCUBAMBA                    24395            1           3749             336
## AIJA                          1528            1             79              26
## ANTONIO RAYMONDI              3089            1             54              31
## ASUNCIÓN                      2032            1             59              21
## BOLOGNESI                     5375            1            242              96
## CARHUAZ                      10348            1            552             163
## CARLOS FERMÍN FITZCARRALD     3398            1             56              34
## CASMA                        11637            0            963             362
## CORONGO                       1816            1             37              19
##                           agnes
## BAGUA                         1
## BONGARA                       1
## CHACHAPOYAS                   1
## CONDORCANQUI                  1
## LUYA                          1
## RODRÍGUEZ DE MENDOZA          1
## UTCUBAMBA                     1
## AIJA                          1
## ANTONIO RAYMONDI              1
## ASUNCIÓN                      1
## BOLOGNESI                     1
## CARHUAZ                       1
## CARLOS FERMÍN FITZCARRALD     1
## CASMA                         2
## CORONGO                       1
fviz_silhouette(res.agnes,print.summary = F)

#REGRESIÓN LOGÍSTICA

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()       masks ggplot2::%+%()
## ✖ psych::alpha()     masks ggplot2::alpha()
## ✖ BBmisc::coalesce() masks dplyr::coalesce()
## ✖ BBmisc::collapse() masks dplyr::collapse()
## ✖ tidyr::extract()   masks magrittr::extract()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ✖ BBmisc::symdiff()  masks dplyr::symdiff()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Supongamos que tu base de datos se llama "data"
# Ajustar el modelo de regresión logística
modelo <- glm(ganaCastillo ~ elec1_Sí + countFallecidos, 
              data = data, 
              family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Resumen del modelo
summary(modelo)
## 
## Call:
## glm(formula = ganaCastillo ~ elec1_Sí + countFallecidos, family = binomial, 
##     data = data)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      8.099e-01  2.793e-01   2.900  0.00373 ** 
## elec1_Sí         2.989e-04  6.606e-05   4.524 6.06e-06 ***
## countFallecidos -9.332e-03  1.968e-03  -4.741 2.12e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 206.24  on 195  degrees of freedom
## Residual deviance: 132.80  on 193  degrees of freedom
## AIC: 138.8
## 
## Number of Fisher Scoring iterations: 8