library(rio)
library(scraEP)
## Warning: package 'scraEP' was built under R version 4.4.2
library(magrittr)
library(polycor)
library(psych)
## Warning: package 'psych' was built under R version 4.4.2
##
## Adjuntando el paquete: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
library(matrixcalc)
library(GPArotation)
##
## Adjuntando el paquete: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(BBmisc)
##
## Adjuntando el paquete: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:BBmisc':
##
## coalesce, collapse, symdiff
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data= import("basefinal.xlsx")
# Verificar las primeras filas de los datos
head(data)
## cod Departamento Provincia Ladrillo o bloque de cemento
## 1 1001 Huánuco Huánuco 30618
## 2 1002 Huánuco Ambo 2576
## 3 1003 Huánuco Dos De Mayo 347
## 4 1004 Huánuco Huacaybamba 18
## 5 1005 Huánuco Huamalíes 831
## 6 1006 Huánuco Leoncio Prado 18118
## Piedra o sillar con cal o cemento Adobe Tapia Quincha (caña con barro)
## 1 151 24950 16165 104
## 2 24 2209 9354 18
## 3 6 1741 7141 6
## 4 0 474 3537 6
## 5 13 1474 10472 53
## 6 124 414 85 68
## Piedra con barro Madera (pona, tornillo etc.) Triplay / calamina / estera
## 1 398 629 175
## 2 84 71 15
## 3 54 183 43
## 4 23 67 5
## 5 149 1643 102
## 6 40 13464 604
## Otro material Tota_paredes Concreto armado Madera Tejas
## 1 0 73190 27576 317 4858
## 2 0 14351 1950 40 1051
## 3 0 9521 203 25 271
## 4 0 4130 14 4 2373
## 5 0 14737 216 94 1842
## 6 0 32917 7963 786 68
## Planchas de calamina, fibra de cemento o similares
## 1 38609
## 2 10716
## 3 7176
## 4 1558
## 5 10987
## 6 23273
## Caña o estera con torta de barro o cemento Triplay / estera / carrizo
## 1 501 127
## 2 52 20
## 3 9 14
## 4 12 9
## 5 35 41
## 6 239 92
## Paja, hoja de palmera y similares Otro material.1 Tota_techos
## 1 1202 0 73190
## 2 522 0 14351
## 3 1823 0 9521
## 4 160 0 4130
## 5 1522 0 14737
## 6 496 0 32917
## Parquet o madera pulida Láminas asfálticas, vinílicos o similares
## 1 749 434
## 2 27 19
## 3 4 5
## 4 0 0
## 5 8 5
## 6 129 125
## Losetas, terrazos, cerámicos o similares Madera (pona, tornillo, etc.)
## 1 10209 807
## 2 360 175
## 3 45 268
## 4 3 4
## 5 32 667
## 6 2953 1695
## Cemento Tierra Otro material.2 Tota_pisos Red pública dentro de la vivienda
## 1 29482 31509 0 73190 41143
## 2 4216 9554 0 14351 6386
## 3 1301 7898 0 9521 4214
## 4 156 3967 0 4130 2321
## 5 1722 12303 0 14737 7579
## 6 18112 9902 1 32917 15284
## Red pública fuera de la vivienda, pero dentro de la edificación
## 1 7788
## 2 2775
## 3 1492
## 4 625
## 5 2305
## 6 3400
## Pilón o pileta de uso público Camión - cisterna u otro similar
## 1 5149 2303
## 2 1853 30
## 3 1104 2
## 4 372 0
## 5 705 6
## 6 1550 129
## Pozo (agua subterránea) Manantial o puquio Río, acequia, lago, laguna Otro
## 1 7753 2389 6151 187
## 2 1607 508 1084 41
## 3 1696 512 464 17
## 4 462 94 236 14
## 5 2301 715 1028 27
## 6 6729 1375 3983 286
## Vecino Total_agua Sí tiene alumbrado eléctrico No tiene alumbrado eléctrico
## 1 327 73190 61038 12152
## 2 67 14351 10364 3987
## 3 20 9521 6157 3364
## 4 6 4130 3164 966
## 5 71 14737 9422 5315
## 6 181 32917 25171 7746
## Tota_electricidad No usa electricidad Sí usa electricidad
## 1 73190 78943 1817
## 2 14351 14714 152
## 3 9521 9888 40
## 4 4130 4408 16
## 5 14737 15424 54
## 6 32917 35024 494
## Tota_energia_cocinar_electricidad No usa gas (balón GLP)
## 1 80760 26595
## 2 14866 7563
## 3 9928 7682
## 4 4424 3946
## 5 15478 12770
## 6 35518 12603
## Sí usa gas (balón GLP) Tota_energia_cocinar_gas_glp No usa carbón
## 1 54165 80760 80028
## 2 7303 14866 14818
## 3 2246 9928 9899
## 4 478 4424 4420
## 5 2708 15478 15430
## 6 22915 35518 35151
## Sí usa carbón Tota_energia_cocinar_carbon No usa leña Sí usa leña
## 1 732 80760 47247 33513
## 2 48 14866 3771 11095
## 3 29 9928 1657 8271
## 4 4 4424 296 4128
## 5 48 15478 1983 13495
## 6 367 35518 18982 16536
## Tota_energia_cocinar_leña
## 1 80760
## 2 14866
## 3 9928
## 4 4424
## 5 15478
## 6 35518
# Limpiar los nombres de las columnas para hacerlos más legibles
colnames(data) <- c("ID", "Código", "Provincia", "No_usa_electricidad", "Si_usa_electricidad", "No_usa_GLP",
"Si_usa_GLP", "No_usa_carbon", "Si_usa_carbon", "No_usa_lena", "Si_usa_lena")
# Verificar los nombres de las columnas después de la limpieza
colnames(data)
## [1] "ID" "Código" "Provincia"
## [4] "No_usa_electricidad" "Si_usa_electricidad" "No_usa_GLP"
## [7] "Si_usa_GLP" "No_usa_carbon" "Si_usa_carbon"
## [10] "No_usa_lena" "Si_usa_lena" NA
## [13] NA NA NA
## [16] NA NA NA
## [19] NA NA NA
## [22] NA NA NA
## [25] NA NA NA
## [28] NA NA NA
## [31] NA NA NA
## [34] NA NA NA
## [37] NA NA NA
## [40] NA NA NA
## [43] NA NA NA
## [46] NA NA NA
## [49] NA NA NA
## [52] NA NA NA
## [55] NA
head(data)
## ID Código Provincia No_usa_electricidad Si_usa_electricidad No_usa_GLP
## 1 1001 Huánuco Huánuco 30618 151 24950
## 2 1002 Huánuco Ambo 2576 24 2209
## 3 1003 Huánuco Dos De Mayo 347 6 1741
## 4 1004 Huánuco Huacaybamba 18 0 474
## 5 1005 Huánuco Huamalíes 831 13 1474
## 6 1006 Huánuco Leoncio Prado 18118 124 414
## Si_usa_GLP No_usa_carbon Si_usa_carbon No_usa_lena Si_usa_lena NA NA NA
## 1 16165 104 398 629 175 0 73190 27576
## 2 9354 18 84 71 15 0 14351 1950
## 3 7141 6 54 183 43 0 9521 203
## 4 3537 6 23 67 5 0 4130 14
## 5 10472 53 149 1643 102 0 14737 216
## 6 85 68 40 13464 604 0 32917 7963
## NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 1 317 4858 38609 501 127 1202 0 73190 749 434 10209 807 29482 31509 0 73190
## 2 40 1051 10716 52 20 522 0 14351 27 19 360 175 4216 9554 0 14351
## 3 25 271 7176 9 14 1823 0 9521 4 5 45 268 1301 7898 0 9521
## 4 4 2373 1558 12 9 160 0 4130 0 0 3 4 156 3967 0 4130
## 5 94 1842 10987 35 41 1522 0 14737 8 5 32 667 1722 12303 0 14737
## 6 786 68 23273 239 92 496 0 32917 129 125 2953 1695 18112 9902 1 32917
## NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 1 41143 7788 5149 2303 7753 2389 6151 187 327 73190 61038 12152 73190 78943
## 2 6386 2775 1853 30 1607 508 1084 41 67 14351 10364 3987 14351 14714
## 3 4214 1492 1104 2 1696 512 464 17 20 9521 6157 3364 9521 9888
## 4 2321 625 372 0 462 94 236 14 6 4130 3164 966 4130 4408
## 5 7579 2305 705 6 2301 715 1028 27 71 14737 9422 5315 14737 15424
## 6 15284 3400 1550 129 6729 1375 3983 286 181 32917 25171 7746 32917 35024
## NA NA NA NA NA NA NA NA NA NA NA
## 1 1817 80760 26595 54165 80760 80028 732 80760 47247 33513 80760
## 2 152 14866 7563 7303 14866 14818 48 14866 3771 11095 14866
## 3 40 9928 7682 2246 9928 9899 29 9928 1657 8271 9928
## 4 16 4424 3946 478 4424 4420 4 4424 296 4128 4424
## 5 54 15478 12770 2708 15478 15430 48 15478 1983 13495 15478
## 6 494 35518 12603 22915 35518 35151 367 35518 18982 16536 35518
Calcular los porcentajes para cada columna con respecto al total de
cada fila
porcentajes_data <- data
porcentajes_data[4:11] <- lapply(porcentajes_data[4:11], as.numeric)
porcentajes_data$total <- rowSums(porcentajes_data[4:11], na.rm = TRUE)
porcentajes_data_percent <- porcentajes_data
for (col in 4:11) {
porcentajes_data_percent[, col] <- (porcentajes_data_percent[, col] / porcentajes_data_percent$total) * 100
}
head(porcentajes_data_percent)
## ID Código Provincia No_usa_electricidad Si_usa_electricidad No_usa_GLP
## 1 1001 Huánuco Huánuco 41.8335838 0.20631234 34.089356
## 2 1002 Huánuco Ambo 17.9499686 0.16723573 15.392656
## 3 1003 Huánuco Dos De Mayo 3.6445751 0.06301859 18.285894
## 4 1004 Huánuco Huacaybamba 0.4358354 0.00000000 11.476998
## 5 1005 Huánuco Huamalíes 5.6388682 0.08821334 10.002036
## 6 1006 Huánuco Leoncio Prado 55.0414679 0.37670505 1.257709
## Si_usa_GLP No_usa_carbon Si_usa_carbon No_usa_lena Si_usa_lena NA NA NA
## 1 22.0863506 0.14209591 0.5437901 0.859407 0.2391037 0 73190 27576
## 2 65.1801268 0.12542680 0.5853251 0.494739 0.1045223 0 14351 1950
## 3 75.0026258 0.06301859 0.5671673 1.922067 0.4516332 0 9521 203
## 4 85.6416465 0.14527845 0.5569007 1.622276 0.1210654 0 4130 14
## 5 71.0592387 0.35963900 1.0110606 11.148809 0.6921354 0 14737 216
## 6 0.2582252 0.20658019 0.1215178 40.902877 1.8349181 0 32917 7963
## NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 1 317 4858 38609 501 127 1202 0 73190 749 434 10209 807 29482 31509 0 73190
## 2 40 1051 10716 52 20 522 0 14351 27 19 360 175 4216 9554 0 14351
## 3 25 271 7176 9 14 1823 0 9521 4 5 45 268 1301 7898 0 9521
## 4 4 2373 1558 12 9 160 0 4130 0 0 3 4 156 3967 0 4130
## 5 94 1842 10987 35 41 1522 0 14737 8 5 32 667 1722 12303 0 14737
## 6 786 68 23273 239 92 496 0 32917 129 125 2953 1695 18112 9902 1 32917
## NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 1 41143 7788 5149 2303 7753 2389 6151 187 327 73190 61038 12152 73190 78943
## 2 6386 2775 1853 30 1607 508 1084 41 67 14351 10364 3987 14351 14714
## 3 4214 1492 1104 2 1696 512 464 17 20 9521 6157 3364 9521 9888
## 4 2321 625 372 0 462 94 236 14 6 4130 3164 966 4130 4408
## 5 7579 2305 705 6 2301 715 1028 27 71 14737 9422 5315 14737 15424
## 6 15284 3400 1550 129 6729 1375 3983 286 181 32917 25171 7746 32917 35024
## NA NA NA NA NA NA NA NA NA NA NA total
## 1 1817 80760 26595 54165 80760 80028 732 80760 47247 33513 80760 73190
## 2 152 14866 7563 7303 14866 14818 48 14866 3771 11095 14866 14351
## 3 40 9928 7682 2246 9928 9899 29 9928 1657 8271 9928 9521
## 4 16 4424 3946 478 4424 4420 4 4424 296 4128 4424 4130
## 5 54 15478 12770 2708 15478 15430 48 15478 1983 13495 15478 14737
## 6 494 35518 12603 22915 35518 35151 367 35518 18982 16536 35518 32917
library(psych)
data_factorial <- porcentajes_data_percent[, 5:11]
fa_varimax <- fa(data_factorial, nfactors = 1, rotate = "varimax")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(fa_varimax)
## Factor Analysis using method = minres
## Call: fa(r = data_factorial, nfactors = 1, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## Si_usa_electricidad -0.06 0.00373 0.996 1
## No_usa_GLP 1.02 1.04292 -0.043 1
## Si_usa_GLP -0.02 0.00049 1.000 1
## No_usa_carbon -0.28 0.07824 0.922 1
## Si_usa_carbon 0.22 0.04877 0.951 1
## No_usa_lena -0.44 0.19425 0.806 1
## Si_usa_lena -0.29 0.08327 0.917 1
##
## MR1
## SS loadings 1.45
## Proportion Var 0.21
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 21 with the objective function = 1.64 with Chi Square = 313.98
## df of the model are 14 and the objective function was 1.05
##
## The root mean square of the residuals (RMSR) is 0.14
## The df corrected root mean square of the residuals is 0.17
##
## The harmonic n.obs is 196 with the empirical chi square 161.27 with prob < 3.9e-27
## The total n.obs was 196 with Likelihood Chi Square = 201.61 with prob < 2.6e-35
##
## Tucker Lewis Index of factoring reliability = 0.036
## RMSEA index = 0.261 and the 90 % confidence intervals are 0.231 0.295
## BIC = 127.72
## Fit based upon off diagonal values = 0.56
fa_oblimin <- fa(data_factorial, nfactors = 1, rotate = "oblimin")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(fa_oblimin)
## Factor Analysis using method = minres
## Call: fa(r = data_factorial, nfactors = 1, rotate = "oblimin")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## Si_usa_electricidad -0.06 0.00373 0.996 1
## No_usa_GLP 1.02 1.04292 -0.043 1
## Si_usa_GLP -0.02 0.00049 1.000 1
## No_usa_carbon -0.28 0.07824 0.922 1
## Si_usa_carbon 0.22 0.04877 0.951 1
## No_usa_lena -0.44 0.19425 0.806 1
## Si_usa_lena -0.29 0.08327 0.917 1
##
## MR1
## SS loadings 1.45
## Proportion Var 0.21
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 21 with the objective function = 1.64 with Chi Square = 313.98
## df of the model are 14 and the objective function was 1.05
##
## The root mean square of the residuals (RMSR) is 0.14
## The df corrected root mean square of the residuals is 0.17
##
## The harmonic n.obs is 196 with the empirical chi square 161.27 with prob < 3.9e-27
## The total n.obs was 196 with Likelihood Chi Square = 201.61 with prob < 2.6e-35
##
## Tucker Lewis Index of factoring reliability = 0.036
## RMSEA index = 0.261 and the 90 % confidence intervals are 0.231 0.295
## BIC = 127.72
## Fit based upon off diagonal values = 0.56
fa_varimax$Vaccounted
## MR1
## SS loadings 1.4516650
## Proportion Var 0.2073807