library(rio)
lkCSV="https://docs.google.com/spreadsheets/d/1PVIpzCuw3m5fOS1SfbD0DIfxvMP3KQGy65F9qHl4AE8/edit?gid=805156833#gid=805156833"
data=import(lkCSV)
#Se quiere organizar un indicador complejo de probreza, asumiendo que las variables de calidad de techo, pared,piso y la disponibilidad de agua representa cada una un concepto o variable latente.
Ud hará el con el archivo entregado (no considere las mediciones ‘otros’ en ningun caso, ni otras que su criterio de analista le dicte).
Al buscar construir el indicador complejo Ud encuentra:
names(data)
## [1] "V1" "key"
## [3] "Código" "pared1_Ladrillo"
## [5] "pared2_Piedra" "pared3_Adobe"
## [7] "pared4_Tapia" "pared5_Quincha"
## [9] "pared6_Piedra" "pared7_Madera"
## [11] "pared8_Triplay" "pared9_Otro"
## [13] "pared10_Total" "techo1_Concreto"
## [15] "techo2_Madera" "techo3_Tejas"
## [17] "techo4_Planchas" "techo5_Caña"
## [19] "techo6_Triplay" "techo7_Paja"
## [21] "techo8_Otro" "techo9_Total"
## [23] "piso1_Parquet" "piso2_Láminas"
## [25] "piso3_Losetas" "piso4_Madera"
## [27] "piso5_Cemento" "piso6_Tierra"
## [29] "piso7_Otro" "piso8_Total"
## [31] "agua1_Red" "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón" "agua4_Camión"
## [35] "agua5_Pozo" "agua6_Manantial"
## [37] "agua7_Río" "agua8_Otro"
## [39] "agua9_Vecino" "agua10_Total"
## [41] "elec1_Sí" "elec2_No"
## [43] "elec3_Total" "departamento"
## [45] "provincia" "Castillo"
## [47] "Keiko" "ganaCastillo"
## [49] "countPositivos" "countFallecidos"
dontselect=c("V1","key","","Código","pared9_Otro","pared10_Total", "techo8_Otro", "techo9_Total", "piso7_Otro", "piso8_Total", "agua8_Otro", "agua9_Vecino" , "agua10_Total", "elec1_Sí", "elec2_No", "elec3_Total", "departamento", "provincia" ,"Castillo", "Keiko", "ganaCastillo", "countPositivos", "countFallecidos")
select=setdiff(names(data),dontselect)
DF=data[,select]
# usaremos:
library(magrittr)
head(DF,10)%>%
rmarkdown::paged_table()
library(polycor)
corMatrix=polycor::hetcor(DF)$correlations
round(corMatrix,2)
## pared1_Ladrillo pared2_Piedra pared3_Adobe pared4_Tapia
## pared1_Ladrillo 1.00 0.65 0.35 -0.03
## pared2_Piedra 0.65 1.00 0.20 -0.05
## pared3_Adobe 0.35 0.20 1.00 0.00
## pared4_Tapia -0.03 -0.05 0.00 1.00
## pared5_Quincha 0.22 0.14 0.15 -0.11
## pared6_Piedra 0.07 0.10 0.18 0.06
## pared7_Madera 0.89 0.50 0.16 -0.09
## pared8_Triplay 0.85 0.50 0.27 -0.09
## techo1_Concreto 1.00 0.65 0.35 -0.02
## techo2_Madera 0.96 0.55 0.26 -0.04
## techo3_Tejas 0.13 0.04 0.45 0.41
## techo4_Planchas 0.92 0.62 0.46 -0.02
## techo5_Caña 0.44 0.24 0.38 -0.13
## techo6_Triplay 0.84 0.49 0.29 -0.10
## techo7_Paja 0.04 0.02 0.08 -0.12
## piso1_Parquet 0.99 0.59 0.31 -0.02
## piso2_Láminas 0.99 0.69 0.30 -0.03
## piso3_Losetas 1.00 0.62 0.36 -0.03
## piso4_Madera 0.57 0.32 0.12 -0.02
## piso5_Cemento 1.00 0.67 0.38 -0.03
## piso6_Tierra 0.69 0.45 0.72 0.14
## agua1_Red 1.00 0.64 0.39 -0.02
## agua2_Red_fueraVivienda 0.99 0.59 0.40 0.01
## agua3_Pilón 0.91 0.84 0.38 -0.02
## agua4_Camión 0.99 0.61 0.33 -0.06
## agua5_Pozo 0.37 0.25 0.45 0.02
## agua6_Manantial -0.05 -0.03 0.27 0.24
## agua7_Río 0.01 0.00 0.06 -0.06
## pared5_Quincha pared6_Piedra pared7_Madera
## pared1_Ladrillo 0.22 0.07 0.89
## pared2_Piedra 0.14 0.10 0.50
## pared3_Adobe 0.15 0.18 0.16
## pared4_Tapia -0.11 0.06 -0.09
## pared5_Quincha 1.00 -0.03 0.17
## pared6_Piedra -0.03 1.00 0.04
## pared7_Madera 0.17 0.04 1.00
## pared8_Triplay 0.45 0.04 0.74
## techo1_Concreto 0.20 0.07 0.88
## techo2_Madera 0.18 0.05 0.94
## techo3_Tejas -0.05 0.00 0.03
## techo4_Planchas 0.43 0.12 0.87
## techo5_Caña 0.16 -0.05 0.31
## techo6_Triplay 0.25 0.03 0.73
## techo7_Paja -0.06 0.26 0.24
## piso1_Parquet 0.17 0.07 0.89
## piso2_Láminas 0.18 0.07 0.88
## piso3_Losetas 0.22 0.06 0.89
## piso4_Madera 0.04 0.02 0.76
## piso5_Cemento 0.24 0.07 0.89
## piso6_Tierra 0.50 0.23 0.58
## agua1_Red 0.24 0.07 0.89
## agua2_Red_fueraVivienda 0.21 0.10 0.90
## agua3_Pilón 0.28 0.11 0.79
## agua4_Camión 0.27 0.06 0.88
## agua5_Pozo 0.07 0.24 0.45
## agua6_Manantial -0.04 0.39 0.00
## agua7_Río 0.47 0.10 0.19
## pared8_Triplay techo1_Concreto techo2_Madera
## pared1_Ladrillo 0.85 1.00 0.96
## pared2_Piedra 0.50 0.65 0.55
## pared3_Adobe 0.27 0.35 0.26
## pared4_Tapia -0.09 -0.02 -0.04
## pared5_Quincha 0.45 0.20 0.18
## pared6_Piedra 0.04 0.07 0.05
## pared7_Madera 0.74 0.88 0.94
## pared8_Triplay 1.00 0.84 0.79
## techo1_Concreto 0.84 1.00 0.96
## techo2_Madera 0.79 0.96 1.00
## techo3_Tejas 0.04 0.13 0.10
## techo4_Planchas 0.85 0.91 0.88
## techo5_Caña 0.51 0.44 0.39
## techo6_Triplay 0.93 0.83 0.80
## techo7_Paja 0.01 0.04 0.08
## piso1_Parquet 0.81 0.99 0.96
## piso2_Láminas 0.81 0.99 0.96
## piso3_Losetas 0.85 1.00 0.97
## piso4_Madera 0.47 0.56 0.61
## piso5_Cemento 0.85 0.99 0.96
## piso6_Tierra 0.72 0.68 0.61
## agua1_Red 0.85 1.00 0.96
## agua2_Red_fueraVivienda 0.82 0.99 0.96
## agua3_Pilón 0.79 0.91 0.86
## agua4_Camión 0.89 0.99 0.95
## agua5_Pozo 0.29 0.37 0.36
## agua6_Manantial -0.08 -0.05 -0.05
## agua7_Río 0.22 -0.02 0.00
## techo3_Tejas techo4_Planchas techo5_Caña techo6_Triplay
## pared1_Ladrillo 0.13 0.92 0.44 0.84
## pared2_Piedra 0.04 0.62 0.24 0.49
## pared3_Adobe 0.45 0.46 0.38 0.29
## pared4_Tapia 0.41 -0.02 -0.13 -0.10
## pared5_Quincha -0.05 0.43 0.16 0.25
## pared6_Piedra 0.00 0.12 -0.05 0.03
## pared7_Madera 0.03 0.87 0.31 0.73
## pared8_Triplay 0.04 0.85 0.51 0.93
## techo1_Concreto 0.13 0.91 0.44 0.83
## techo2_Madera 0.10 0.88 0.39 0.80
## techo3_Tejas 1.00 0.09 -0.05 0.03
## techo4_Planchas 0.09 1.00 0.37 0.76
## techo5_Caña -0.05 0.37 1.00 0.66
## techo6_Triplay 0.03 0.76 0.66 1.00
## techo7_Paja -0.13 0.13 -0.11 -0.02
## piso1_Parquet 0.13 0.89 0.38 0.80
## piso2_Láminas 0.12 0.89 0.38 0.80
## piso3_Losetas 0.14 0.92 0.44 0.84
## piso4_Madera 0.16 0.63 0.14 0.43
## piso5_Cemento 0.13 0.93 0.47 0.85
## piso6_Tierra 0.32 0.84 0.36 0.62
## agua1_Red 0.16 0.93 0.46 0.84
## agua2_Red_fueraVivienda 0.19 0.92 0.40 0.81
## agua3_Pilón 0.11 0.90 0.38 0.76
## agua4_Camión 0.10 0.91 0.46 0.88
## agua5_Pozo 0.04 0.51 0.15 0.26
## agua6_Manantial 0.19 0.06 -0.13 -0.10
## agua7_Río -0.10 0.27 -0.03 0.04
## techo7_Paja piso1_Parquet piso2_Láminas piso3_Losetas
## pared1_Ladrillo 0.04 0.99 0.99 1.00
## pared2_Piedra 0.02 0.59 0.69 0.62
## pared3_Adobe 0.08 0.31 0.30 0.36
## pared4_Tapia -0.12 -0.02 -0.03 -0.03
## pared5_Quincha -0.06 0.17 0.18 0.22
## pared6_Piedra 0.26 0.07 0.07 0.06
## pared7_Madera 0.24 0.89 0.88 0.89
## pared8_Triplay 0.01 0.81 0.81 0.85
## techo1_Concreto 0.04 0.99 0.99 1.00
## techo2_Madera 0.08 0.96 0.96 0.97
## techo3_Tejas -0.13 0.13 0.12 0.14
## techo4_Planchas 0.13 0.89 0.89 0.92
## techo5_Caña -0.11 0.38 0.38 0.44
## techo6_Triplay -0.02 0.80 0.80 0.84
## techo7_Paja 1.00 0.05 0.05 0.04
## piso1_Parquet 0.05 1.00 0.99 0.99
## piso2_Láminas 0.05 0.99 1.00 0.99
## piso3_Losetas 0.04 0.99 0.99 1.00
## piso4_Madera 0.34 0.57 0.56 0.57
## piso5_Cemento 0.04 0.98 0.98 0.99
## piso6_Tierra 0.20 0.63 0.64 0.68
## agua1_Red 0.04 0.98 0.99 1.00
## agua2_Red_fueraVivienda 0.06 0.99 0.98 0.99
## agua3_Pilón 0.08 0.87 0.92 0.90
## agua4_Camión 0.03 0.98 0.98 0.99
## agua5_Pozo 0.51 0.35 0.35 0.35
## agua6_Manantial 0.33 -0.04 -0.05 -0.06
## agua7_Río 0.43 -0.03 -0.03 0.01
## piso4_Madera piso5_Cemento piso6_Tierra agua1_Red
## pared1_Ladrillo 0.57 1.00 0.69 1.00
## pared2_Piedra 0.32 0.67 0.45 0.64
## pared3_Adobe 0.12 0.38 0.72 0.39
## pared4_Tapia -0.02 -0.03 0.14 -0.02
## pared5_Quincha 0.04 0.24 0.50 0.24
## pared6_Piedra 0.02 0.07 0.23 0.07
## pared7_Madera 0.76 0.89 0.58 0.89
## pared8_Triplay 0.47 0.85 0.72 0.85
## techo1_Concreto 0.56 0.99 0.68 1.00
## techo2_Madera 0.61 0.96 0.61 0.96
## techo3_Tejas 0.16 0.13 0.32 0.16
## techo4_Planchas 0.63 0.93 0.84 0.93
## techo5_Caña 0.14 0.47 0.36 0.46
## techo6_Triplay 0.43 0.85 0.62 0.84
## techo7_Paja 0.34 0.04 0.20 0.04
## piso1_Parquet 0.57 0.98 0.63 0.98
## piso2_Láminas 0.56 0.98 0.64 0.99
## piso3_Losetas 0.57 0.99 0.68 1.00
## piso4_Madera 1.00 0.58 0.38 0.58
## piso5_Cemento 0.58 1.00 0.71 1.00
## piso6_Tierra 0.38 0.71 1.00 0.72
## agua1_Red 0.58 1.00 0.72 1.00
## agua2_Red_fueraVivienda 0.59 0.98 0.72 0.99
## agua3_Pilón 0.51 0.92 0.73 0.91
## agua4_Camión 0.56 0.98 0.69 0.99
## agua5_Pozo 0.43 0.40 0.59 0.38
## agua6_Manantial -0.01 -0.05 0.30 -0.05
## agua7_Río 0.35 0.02 0.37 0.02
## agua2_Red_fueraVivienda agua3_Pilón agua4_Camión
## pared1_Ladrillo 0.99 0.91 0.99
## pared2_Piedra 0.59 0.84 0.61
## pared3_Adobe 0.40 0.38 0.33
## pared4_Tapia 0.01 -0.02 -0.06
## pared5_Quincha 0.21 0.28 0.27
## pared6_Piedra 0.10 0.11 0.06
## pared7_Madera 0.90 0.79 0.88
## pared8_Triplay 0.82 0.79 0.89
## techo1_Concreto 0.99 0.91 0.99
## techo2_Madera 0.96 0.86 0.95
## techo3_Tejas 0.19 0.11 0.10
## techo4_Planchas 0.92 0.90 0.91
## techo5_Caña 0.40 0.38 0.46
## techo6_Triplay 0.81 0.76 0.88
## techo7_Paja 0.06 0.08 0.03
## piso1_Parquet 0.99 0.87 0.98
## piso2_Láminas 0.98 0.92 0.98
## piso3_Losetas 0.99 0.90 0.99
## piso4_Madera 0.59 0.51 0.56
## piso5_Cemento 0.98 0.92 0.98
## piso6_Tierra 0.72 0.73 0.69
## agua1_Red 0.99 0.91 0.99
## agua2_Red_fueraVivienda 1.00 0.88 0.97
## agua3_Pilón 0.88 1.00 0.89
## agua4_Camión 0.97 0.89 1.00
## agua5_Pozo 0.39 0.40 0.34
## agua6_Manantial 0.03 0.01 -0.07
## agua7_Río 0.01 0.10 0.03
## agua5_Pozo agua6_Manantial agua7_Río
## pared1_Ladrillo 0.37 -0.05 0.01
## pared2_Piedra 0.25 -0.03 0.00
## pared3_Adobe 0.45 0.27 0.06
## pared4_Tapia 0.02 0.24 -0.06
## pared5_Quincha 0.07 -0.04 0.47
## pared6_Piedra 0.24 0.39 0.10
## pared7_Madera 0.45 0.00 0.19
## pared8_Triplay 0.29 -0.08 0.22
## techo1_Concreto 0.37 -0.05 -0.02
## techo2_Madera 0.36 -0.05 0.00
## techo3_Tejas 0.04 0.19 -0.10
## techo4_Planchas 0.51 0.06 0.27
## techo5_Caña 0.15 -0.13 -0.03
## techo6_Triplay 0.26 -0.10 0.04
## techo7_Paja 0.51 0.33 0.43
## piso1_Parquet 0.35 -0.04 -0.03
## piso2_Láminas 0.35 -0.05 -0.03
## piso3_Losetas 0.35 -0.06 0.01
## piso4_Madera 0.43 -0.01 0.35
## piso5_Cemento 0.40 -0.05 0.02
## piso6_Tierra 0.59 0.30 0.37
## agua1_Red 0.38 -0.05 0.02
## agua2_Red_fueraVivienda 0.39 0.03 0.01
## agua3_Pilón 0.40 0.01 0.10
## agua4_Camión 0.34 -0.07 0.03
## agua5_Pozo 1.00 0.29 0.27
## agua6_Manantial 0.29 1.00 0.31
## agua7_Río 0.27 0.31 1.00
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.46
## MSA for each item =
## pared1_Ladrillo pared2_Piedra pared3_Adobe
## 0.51 0.31 0.17
## pared4_Tapia pared5_Quincha pared6_Piedra
## 0.02 0.10 0.03
## pared7_Madera pared8_Triplay techo1_Concreto
## 0.46 0.45 0.50
## techo2_Madera techo3_Tejas techo4_Planchas
## 0.49 0.05 0.49
## techo5_Caña techo6_Triplay techo7_Paja
## 0.19 0.44 0.06
## piso1_Parquet piso2_Láminas piso3_Losetas
## 0.72 0.73 0.73
## piso4_Madera piso5_Cemento piso6_Tierra
## 0.52 0.73 0.63
## agua1_Red agua2_Red_fueraVivienda agua3_Pilón
## 0.70 0.70 0.67
## agua4_Camión agua5_Pozo agua6_Manantial
## 0.70 0.36 0.12
## agua7_Río
## 0.15
cortest.bartlett(corMatrix,n=nrow(DF))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] TRUE
fa.parallel(DF, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 4 and the number of components = NA
library(GPArotation)
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa <- fa(DF,
nfactors = 4,
cor = 'mixed',
rotate = "varimax", #oblimin?
fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa$loadings)
##
## Loadings:
## MR1 MR2 MR4 MR3
## pared1_Ladrillo 0.989 0.101
## pared2_Piedra 0.632
## pared3_Adobe 0.267 0.152 0.315 0.634
## pared4_Tapia -0.122 0.384
## pared5_Quincha 0.154 0.702 -0.102
## pared6_Piedra 0.319 0.208
## pared7_Madera 0.905 0.317 -0.158
## pared8_Triplay 0.821 0.468
## techo1_Concreto 0.990
## techo2_Madera 0.970
## techo3_Tejas 0.596
## techo4_Planchas 0.877 0.260 0.343
## techo5_Caña 0.423 -0.180 0.304
## techo6_Triplay 0.829 0.314
## techo7_Paja 0.763 -0.113
## piso1_Parquet 0.984
## piso2_Láminas 0.990
## piso3_Losetas 0.987
## piso4_Madera 0.584 0.427 -0.120
## piso5_Cemento 0.985 0.130
## piso6_Tierra 0.583 0.353 0.573 0.494
## agua1_Red 0.983 0.127 0.118
## agua2_Red_fueraVivienda 0.968 0.166
## agua3_Pilón 0.889 0.108 0.197 0.117
## agua4_Camión 0.978 0.165
## agua5_Pozo 0.329 0.587 0.207
## agua6_Manantial -0.113 0.497 0.393
## agua7_Río 0.617 0.503 -0.202
##
## MR1 MR2 MR4 MR3
## SS loadings 14.885 2.238 1.871 1.612
## Proportion Var 0.532 0.080 0.067 0.058
## Cumulative Var 0.532 0.612 0.678 0.736
#KMO es muy bajo -> NO CUMPLE #BARLETT -> CUMPLE #MATRIZ DE SINGULARIDAD = TRUE, no cumple #POR LO TANTO NO SE PUEDE REALIZAR UN EFA
library(GPArotation)
resfa <- fa(DF,
nfactors = 4,
cor = 'mixed',
rotate = "oblimin", #oblimin?
fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa$loadings)
##
## Loadings:
## MR1 MR3 MR2 MR4
## pared1_Ladrillo 0.993
## pared2_Piedra 0.632
## pared3_Adobe 0.734
## pared4_Tapia 0.364 -0.267
## pared5_Quincha 0.705
## pared6_Piedra 0.265 0.267
## pared7_Madera 0.956 -0.187 0.305
## pared8_Triplay 0.762 -0.102 0.412
## techo1_Concreto 0.999
## techo2_Madera 1.007
## techo3_Tejas 0.579 -0.123 -0.301
## techo4_Planchas 0.806 0.158 0.138 0.224
## techo5_Caña 0.376 -0.255 0.241
## techo6_Triplay 0.803 -0.177 0.241
## techo7_Paja 0.774
## piso1_Parquet 1.012 -0.101
## piso2_Láminas 1.017
## piso3_Losetas 0.992
## piso4_Madera 0.617 -0.112 0.425
## piso5_Cemento 0.980
## piso6_Tierra 0.371 0.672 0.117 0.311
## agua1_Red 0.975
## agua2_Red_fueraVivienda 0.960 0.120
## agua3_Pilón 0.857 0.113
## agua4_Camión 0.978
## agua5_Pozo 0.250 0.312 0.498
## agua6_Manantial -0.224 0.507 0.402 -0.113
## agua7_Río -0.169 0.552 0.573
##
## MR1 MR3 MR2 MR4
## SS loadings 14.572 2.016 1.837 1.483
## Proportion Var 0.520 0.072 0.066 0.053
## Cumulative Var 0.520 0.592 0.658 0.711
#CLUSTERIZACIÓN Utilizando el porcentaje de viviendas que tiene electricidad, la razón de votacion de castillo entre keiko, y la tasa fallecidos por cada 1000 contagiados, Ud se propone agrupar a las provincias del Peru (sin la provincia de Lima) siguiendo una estrategia aglomerativa (no corrija correlacion negativa si la hubiera); y en ese proceso Ud. encuentra…
names(data)
## [1] "V1" "key"
## [3] "Código" "pared1_Ladrillo"
## [5] "pared2_Piedra" "pared3_Adobe"
## [7] "pared4_Tapia" "pared5_Quincha"
## [9] "pared6_Piedra" "pared7_Madera"
## [11] "pared8_Triplay" "pared9_Otro"
## [13] "pared10_Total" "techo1_Concreto"
## [15] "techo2_Madera" "techo3_Tejas"
## [17] "techo4_Planchas" "techo5_Caña"
## [19] "techo6_Triplay" "techo7_Paja"
## [21] "techo8_Otro" "techo9_Total"
## [23] "piso1_Parquet" "piso2_Láminas"
## [25] "piso3_Losetas" "piso4_Madera"
## [27] "piso5_Cemento" "piso6_Tierra"
## [29] "piso7_Otro" "piso8_Total"
## [31] "agua1_Red" "agua2_Red_fueraVivienda"
## [33] "agua3_Pilón" "agua4_Camión"
## [35] "agua5_Pozo" "agua6_Manantial"
## [37] "agua7_Río" "agua8_Otro"
## [39] "agua9_Vecino" "agua10_Total"
## [41] "elec1_Sí" "elec2_No"
## [43] "elec3_Total" "departamento"
## [45] "provincia" "Castillo"
## [47] "Keiko" "ganaCastillo"
## [49] "countPositivos" "countFallecidos"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Excluir una provincia específica
df1 <- data %>%
filter(provincia != "LIMA")
# Verifica los datos filtrados
head(df1)
## V1 key Código pared1_Ladrillo pared2_Piedra
## 1 1 AMAZONAS+BAGUA 102 4633 46
## 2 2 AMAZONAS+BONGARA 103 1602 9
## 3 3 AMAZONAS+CHACHAPOYAS 101 3782 22
## 4 4 AMAZONAS+CONDORCANQUI 104 291 7
## 5 5 AMAZONAS+LUYA 105 430 7
## 6 6 AMAZONAS+RODRIGUEZ DE MENDOZA 106 1546 7
## pared3_Adobe pared4_Tapia pared5_Quincha pared6_Piedra pared7_Madera
## 1 6639 222 2518 127 4484
## 2 2729 240 157 36 2505
## 3 5881 2476 309 168 1270
## 4 672 8 386 7 8145
## 5 5217 6052 346 54 606
## 6 2778 155 720 28 3646
## pared8_Triplay pared9_Otro pared10_Total techo1_Concreto techo2_Madera
## 1 851 0 19520 2187 294
## 2 30 0 7308 692 75
## 3 91 0 13999 2262 160
## 4 200 0 9716 56 188
## 5 45 0 12757 187 43
## 6 24 0 8904 480 48
## techo3_Tejas techo4_Planchas techo5_Caña techo6_Triplay techo7_Paja
## 1 179 13186 160 106 3408
## 2 382 6084 38 5 32
## 3 3393 8005 50 14 115
## 4 177 2036 15 10 7234
## 5 3071 9343 26 12 75
## 6 2810 5495 15 5 51
## techo8_Otro techo9_Total piso1_Parquet piso2_Láminas piso3_Losetas
## 1 0 19520 6 19 647
## 2 0 7308 5 2 165
## 3 0 13999 23 36 1077
## 4 0 9716 2 0 20
## 5 0 12757 4 0 46
## 6 0 8904 3 4 264
## piso4_Madera piso5_Cemento piso6_Tierra piso7_Otro piso8_Total agua1_Red
## 1 157 7121 11569 1 19520 9429
## 2 132 2917 4087 0 7308 4569
## 3 240 6189 6434 0 13999 10647
## 4 1523 943 7228 0 9716 1307
## 5 295 1911 10501 0 12757 7172
## 6 176 2974 5483 0 8904 5256
## agua2_Red_fueraVivienda agua3_Pilón agua4_Camión agua5_Pozo agua6_Manantial
## 1 4392 793 59 1792 270
## 2 1497 215 0 474 67
## 3 1619 184 49 876 92
## 4 867 1003 2 2564 431
## 5 3097 1112 0 819 132
## 6 1278 154 0 1020 211
## agua7_Río agua8_Otro agua9_Vecino agua10_Total elec1_Sí elec2_No elec3_Total
## 1 2648 56 81 19520 13204 6316 19520
## 2 388 61 37 7308 6025 1283 7308
## 3 488 24 20 13999 12248 1751 13999
## 4 3428 80 34 9716 1792 7924 9716
## 5 369 9 47 12757 10886 1871 12757
## 6 948 29 8 8904 6895 2009 8904
## departamento provincia Castillo Keiko ganaCastillo countPositivos
## 1 AMAZONAS BAGUA 25629 10770 1 8126
## 2 AMAZONAS BONGARA 8374 5209 1 389
## 3 AMAZONAS CHACHAPOYAS 15671 10473 1 2174
## 4 AMAZONAS CONDORCANQUI 13154 1446 1 3481
## 5 AMAZONAS LUYA 12606 7840 1 456
## 6 AMAZONAS RODRÍGUEZ DE MENDOZA 7967 5491 1 110
## countFallecidos
## 1 462
## 2 72
## 3 281
## 4 111
## 5 88
## 6 60
boxplot(df1[,c(41,48:50)],horizontal = F,las=2,cex.axis = 0.5)
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse, symdiff
## The following object is masked from 'package:base':
##
## isFALSE
boxplot(normalize(df1[,c(41,48:50)],method='range',range=c(0,10)))
boxplot(normalize(df1[,c(41,48:50)],method='standardize'))
cor(df1[,c(41,48:50)])
## elec1_Sí ganaCastillo countPositivos countFallecidos
## elec1_Sí 1.0000000 -0.3229785 0.9584402 0.9748019
## ganaCastillo -0.3229785 1.0000000 -0.3304095 -0.3996211
## countPositivos 0.9584402 -0.3304095 1.0000000 0.9602972
## countFallecidos 0.9748019 -0.3996211 0.9602972 1.0000000
data2=df1[,c(41,48:50)]
row.names(data2)=df1$provincia
library(cluster)
g.dist = daisy(data2, metric="gower")
## Warning in daisy(data2, metric = "gower"): binary variable(s) 2 treated as
## interval scaled
#PARA JERARQUICO
## PARA JERARQUICO
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(data2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
set.seed(123)
library(factoextra)
res.agnes<- hcut(g.dist, k = 3,hc_func='agnes',hc_method = "ward.D")
data2$agnes=res.agnes$cluster
head(data2,15)
## elec1_Sí ganaCastillo countPositivos countFallecidos
## BAGUA 13204 1 8126 462
## BONGARA 6025 1 389 72
## CHACHAPOYAS 12248 1 2174 281
## CONDORCANQUI 1792 1 3481 111
## LUYA 10886 1 456 88
## RODRÍGUEZ DE MENDOZA 6895 1 110 60
## UTCUBAMBA 24395 1 3749 336
## AIJA 1528 1 79 26
## ANTONIO RAYMONDI 3089 1 54 31
## ASUNCIÓN 2032 1 59 21
## BOLOGNESI 5375 1 242 96
## CARHUAZ 10348 1 552 163
## CARLOS FERMÍN FITZCARRALD 3398 1 56 34
## CASMA 11637 0 963 362
## CORONGO 1816 1 37 19
## agnes
## BAGUA 1
## BONGARA 1
## CHACHAPOYAS 1
## CONDORCANQUI 1
## LUYA 1
## RODRÍGUEZ DE MENDOZA 1
## UTCUBAMBA 1
## AIJA 1
## ANTONIO RAYMONDI 1
## ASUNCIÓN 1
## BOLOGNESI 1
## CARHUAZ 1
## CARLOS FERMÍN FITZCARRALD 1
## CASMA 2
## CORONGO 1
fviz_silhouette(res.agnes,print.summary = F)
#REGRESIÓN LOGÍSTICA
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ psych::alpha() masks ggplot2::alpha()
## ✖ BBmisc::coalesce() masks dplyr::coalesce()
## ✖ BBmisc::collapse() masks dplyr::collapse()
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ✖ BBmisc::symdiff() masks dplyr::symdiff()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Supongamos que tu base de datos se llama "data"
# Ajustar el modelo de regresión logística
modelo <- glm(ganaCastillo ~ elec1_Sí + countFallecidos,
data = data,
family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Resumen del modelo
summary(modelo)
##
## Call:
## glm(formula = ganaCastillo ~ elec1_Sí + countFallecidos, family = binomial,
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.099e-01 2.793e-01 2.900 0.00373 **
## elec1_Sí 2.989e-04 6.606e-05 4.524 6.06e-06 ***
## countFallecidos -9.332e-03 1.968e-03 -4.741 2.12e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 206.24 on 195 degrees of freedom
## Residual deviance: 132.80 on 193 degrees of freedom
## AIC: 138.8
##
## Number of Fisher Scoring iterations: 8