library(rio)
library(rio)
hogar=import("reporte_H.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
hogar=hogar[,-c(1)]
hogar=hogar[-c(1:4),]
row.names(hogar)=NULL
hogar=hogar[-c(198:200),]
colnames(hogar)=hogar[1,]
hogar=hogar[-c(1),]
row.names(hogar)=NULL
str(hogar)
## 'data.frame': 196 obs. of 14 variables:
## $ Código : chr "101" "102" "103" "104" ...
## $ Provincia : chr "Amazonas, provincia: Chachapoyas" "Amazonas, provincia: Bagua" "Amazonas, provincia: Bongara" "Amazonas, provincia: Condorcanqui" ...
## $ No usa electricidad : chr "14763" "20313" "7689" "9853" ...
## $ Sí usa electricidad : chr "574" "161" "124" "14" ...
## $ Total : chr "15337" "20474" "7813" "9867" ...
## $ No usa gas (balón GLP): chr "4696" "10557" "3154" "8331" ...
## $ Sí usa gas (balón GLP): chr "10641" "9917" "4659" "1536" ...
## $ Total : chr "15337" "20474" "7813" "9867" ...
## $ No usa carbón : chr "15161" "20185" "7755" "9841" ...
## $ Sí usa carbón : chr "176" "289" "58" "26" ...
## $ Total : chr "15337" "20474" "7813" "9867" ...
## $ No usa leña : chr "7236" "7357" "2345" "1059" ...
## $ Sí usa leña : chr "8101" "13117" "5468" "8808" ...
## $ Total : chr "15337" "20474" "7813" "9867" ...
names(hogar)
## [1] "Código" "Provincia" "No usa electricidad"
## [4] "Sí usa electricidad" "Total" "No usa gas (balón GLP)"
## [7] "Sí usa gas (balón GLP)" "Total" "No usa carbón"
## [10] "Sí usa carbón" "Total" "No usa leña"
## [13] "Sí usa leña" "Total"
names(hogar)[5]=c("Total_cocinar_electricidad")
names(hogar)[8]=c("Total_cocinar_gas")
names(hogar)[11]=c("Total_cocinar_carbon")
names(hogar)[14]=c("Total_cocinar_leña")
library(stringr)
library(magrittr)
hogar$dep=str_split(string = hogar$Provincia,
pattern = ', provincia: ',
simplify = T)[,1]
hogar$prov=str_split(string = hogar$Provincia,
pattern = ', provincia: ',
simplify = T)[,2]
hogar$Provincia=NULL
hogar=hogar[,c(1,14,15,2:13)]
str(hogar)
## 'data.frame': 196 obs. of 15 variables:
## $ Código : chr "101" "102" "103" "104" ...
## $ dep : chr "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
## $ prov : chr "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
## $ No usa electricidad : chr "14763" "20313" "7689" "9853" ...
## $ Sí usa electricidad : chr "574" "161" "124" "14" ...
## $ Total_cocinar_electricidad: chr "15337" "20474" "7813" "9867" ...
## $ No usa gas (balón GLP) : chr "4696" "10557" "3154" "8331" ...
## $ Sí usa gas (balón GLP) : chr "10641" "9917" "4659" "1536" ...
## $ Total_cocinar_gas : chr "15337" "20474" "7813" "9867" ...
## $ No usa carbón : chr "15161" "20185" "7755" "9841" ...
## $ Sí usa carbón : chr "176" "289" "58" "26" ...
## $ Total_cocinar_carbon : chr "15337" "20474" "7813" "9867" ...
## $ No usa leña : chr "7236" "7357" "2345" "1059" ...
## $ Sí usa leña : chr "8101" "13117" "5468" "8808" ...
## $ Total_cocinar_leña : chr "15337" "20474" "7813" "9867" ...
hogar[,c(4:15)]=lapply(hogar[,c(4:15)], as.numeric)
str(hogar)
## 'data.frame': 196 obs. of 15 variables:
## $ Código : chr "101" "102" "103" "104" ...
## $ dep : chr "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
## $ prov : chr "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
## $ No usa electricidad : num 14763 20313 7689 9853 13112 ...
## $ Sí usa electricidad : num 574 161 124 14 90 65 255 921 16 33 ...
## $ Total_cocinar_electricidad: num 15337 20474 7813 9867 13202 ...
## $ No usa gas (balón GLP) : num 4696 10557 3154 8331 6863 ...
## $ Sí usa gas (balón GLP) : num 10641 9917 4659 1536 6339 ...
## $ Total_cocinar_gas : num 15337 20474 7813 9867 13202 ...
## $ No usa carbón : num 15161 20185 7755 9841 13169 ...
## $ Sí usa carbón : num 176 289 58 26 33 26 335 218 4 4 ...
## $ Total_cocinar_carbon : num 15337 20474 7813 9867 13202 ...
## $ No usa leña : num 7236 7357 2345 1059 1833 ...
## $ Sí usa leña : num 8101 13117 5468 8808 11369 ...
## $ Total_cocinar_leña : num 15337 20474 7813 9867 13202 ...
sum(is.na(hogar))
## [1] 0
hogar = na.omit(hogar)
# Calcular el porcentaje de "Sí usa electricidad"
hogar$porcentaje_usa_electricidad <- (hogar$`Sí usa electricidad` /
(hogar$`Sí usa electricidad` + hogar$`No usa electricidad`)) * 100
# Mostrar los resultados
hogar$porcentaje_usa_electricidad
## [1] 3.7425833 0.7863632 1.5870984 0.1418871 0.6817149 0.7089878
## [7] 0.8094467 1.9911791 0.7980050 0.8403361 0.5086901 1.1270349
## [13] 0.9042395 0.3579503 2.4279808 0.8083690 0.7539537 2.0192509
## [19] 0.9460360 0.2282733 0.9935040 0.3523194 0.3842987 0.9773188
## [25] 3.6097033 0.6349206 0.8084619 1.4298220 1.0506029 0.7598039
## [31] 0.7890223 1.2919727 0.6450678 0.5026491 3.3655724 2.8160889
## [37] 1.7772705 1.0188015 1.6586792 0.5545653 2.5764895 0.7739586
## [43] 2.1529144 0.8638065 1.0669253 1.4146603 1.0940608 0.9961475
## [49] 0.8124166 1.3501350 0.4689332 1.0850400 0.7941653 1.5731412
## [55] 0.8368201 0.7573856 0.8091160 0.5171655 0.6003548 0.7434213
## [61] 1.5177479 0.3840163 0.7052660 0.5766863 0.3200244 0.5160846
## [67] 4.5483570 3.1401597 1.6491964 0.7582090 0.5876489 0.3551070
## [73] 0.6390627 0.8557457 0.7824726 1.1273799 0.4268293 0.4760341
## [79] 0.9513015 0.7991282 0.9345235 0.5998286 0.6092864 0.6706266
## [85] 0.5718872 0.4406625 0.7826296 2.2498762 1.0224674 0.4029009
## [91] 0.3616637 0.3488823 1.3908441 0.4983836 0.9227468 0.5939706
## [97] 0.2859696 0.3966438 2.6324455 2.9859099 5.1787329 1.3195099
## [103] 2.4048706 2.1676080 0.7731174 1.6591727 0.9564083 0.5003950
## [109] 0.7023386 1.4146128 4.2513168 1.0186302 4.4006537 2.2733005
## [115] 0.8659080 2.1198279 0.3109038 0.8646132 3.1047250 0.9803402
## [121] 0.8024485 0.5639883 0.9297650 1.7844647 3.3769559 1.0542465
## [127] 1.5739322 5.3186771 2.8182942 0.8053691 1.4831574 2.9685655
## [133] 3.3240665 1.9378512 4.2479766 1.2398461 0.7714702 2.6533516
## [139] 1.5374539 0.7066880 0.7393365 0.9094567 0.7732807 0.4780954
## [145] 0.2320186 2.6828597 0.8923791 2.1153846 4.0101382 0.5464481
## [151] 3.6142047 1.2150011 0.3909802 0.9860510 2.6506681 0.5014918
## [157] 0.7130571 1.2904745 2.0731603 2.1179432 2.7646521 1.8332926
## [163] 1.0698767 0.4931562 0.4310515 0.3483562 0.5484811 0.4424779
## [169] 0.5062979 0.7847287 0.4442963 0.9903325 1.7842801 0.3928589
## [175] 0.4525517 2.3288847 1.2492444 0.8931419 1.3615546 1.7261689
## [181] 1.6056255 0.7583371 1.3140142 3.1765986 2.6595186 2.8202225
## [187] 0.9704969 22.0328283 1.0141988 3.5118812 1.8998273 2.0754039
## [193] 2.1548455 0.7065129 1.0734138 0.1453488
# Calcular el porcentaje de "Sí usa gas"
hogar$porcentaje_usa_gas <- (hogar$`Sí usa gas (balón GLP)` /
(hogar$`Sí usa gas (balón GLP)` + hogar$`No usa gas`)) * 100
# Mostrar los resultados
hogar$porcentaje_usa_gas
## [1] 69.381235 48.437042 59.631384 15.567042 48.015452 41.241274 51.036409
## [8] 65.522982 23.840399 19.760632 33.361594 33.504939 26.786244 16.183120
## [15] 65.758908 24.251070 21.619468 76.898933 37.301799 9.130931 24.952235
## [22] 11.773341 14.713149 33.892679 89.784209 15.682540 32.271104 70.466881
## [29] 38.404588 36.421569 47.718696 33.125656 22.881786 28.365711 93.061997
## [36] 87.587183 85.904125 73.557470 79.290303 61.616162 85.811415 40.632825
## [43] 69.836200 36.404645 48.108632 52.438276 39.679833 42.839846 57.572451
## [50] 46.954695 35.990621 34.734843 24.764992 58.050261 31.970331 26.357854
## [57] 31.216146 33.901849 31.032883 23.093262 61.581772 36.559429 32.408651
## [64] 22.438930 21.807376 24.737657 86.904082 91.450820 38.532495 45.994030
## [71] 44.535390 20.661620 60.617761 31.629678 55.258216 49.838612 30.841463
## [78] 28.570256 45.035016 55.484926 50.985443 29.280206 28.545416 30.753018
## [85] 21.400622 27.473028 26.757907 67.069094 49.125521 22.622885 10.804702
## [92] 17.495800 64.516583 24.703664 34.535050 45.825395 13.583557 9.733028
## [99] 76.522025 76.064986 74.587082 64.443921 73.452757 85.063681 47.738479
## [106] 60.991457 58.755879 71.135107 34.100739 75.991687 85.458992 66.210964
## [113] 91.275406 85.991298 19.173676 78.087720 39.129469 21.773288 85.145555
## [120] 31.869005 32.640621 18.012812 42.316874 72.030266 90.571666 64.564730
## [127] 63.582328 78.088659 89.686032 27.248322 65.736551 89.122734 88.381768
## [134] 69.665176 86.849069 58.700299 38.675400 64.287854 46.957841 23.575696
## [141] 25.668246 22.060362 31.103982 22.002147 24.767981 78.277096 57.362127
## [148] 68.750000 78.331366 30.928962 93.586888 77.110463 28.296054 51.270643
## [155] 71.803341 27.407478 22.580141 53.380273 77.963391 73.715690 86.805900
## [162] 72.402835 63.813224 38.433474 34.974493 38.193991 52.523780 48.482933
## [169] 45.696468 53.626064 26.113518 54.106736 85.723589 36.958576 53.331477
## [176] 65.433670 49.727987 39.383307 57.210051 49.723632 64.184002 54.609411
## [183] 63.826480 79.293005 63.287081 88.682754 27.484472 55.271465 25.679513
## [190] 89.154210 82.141623 86.944345 78.150273 30.972876 64.251486 16.860465
# Calcular el porcentaje de "Sí usa carbón"
hogar$porcentaje_usa_carbón <- (hogar$`Sí usa carbón` /
(hogar$`Sí usa carbón` + hogar$`No usa carbón`)) * 100
# Mostrar los resultados
hogar$porcentaje_usa_carbón
## [1] 1.14755167 1.41154635 0.74235249 0.26350461 0.24996213 0.28359511
## [7] 1.06339079 0.47131059 0.19950125 0.10185893 0.29673591 1.34965911
## [13] 0.16305959 0.13187641 1.24023886 0.33285782 0.18389114 2.07156309
## [19] 0.21319121 0.09783140 0.53496370 0.92483852 0.17842438 0.33191960
## [25] 1.14916556 0.19047619 0.26948730 0.46396265 0.24913625 0.31862745
## [31] 0.37735849 0.68861490 0.25367834 0.27170221 0.42256529 0.36708794
## [37] 0.45654655 0.55570992 0.46565378 0.23767083 0.30416890 0.40974277
## [43] 0.41652825 0.41270755 0.38797284 0.26069621 0.20839254 0.42927903
## [49] 0.46077361 0.39003900 0.26377491 0.23057100 0.21069692 1.03039188
## [55] 0.16165843 0.44355176 1.32380374 0.34110915 0.93327876 0.78275577
## [61] 1.12418793 0.37327461 0.32240731 0.31102184 0.16763182 0.86874247
## [67] 0.83086678 2.18101818 0.58700210 0.62686567 0.70832677 0.29903747
## [73] 1.31473838 0.47018996 0.27647366 0.39060673 0.68292683 0.51707157
## [79] 1.84592964 0.71437220 0.36841790 0.40274207 0.16107571 0.44069745
## [85] 0.17056286 0.45585777 0.19666078 0.90638930 0.32288443 0.29210314
## [91] 0.09041591 0.31011759 1.03327890 0.33674569 0.20028612 0.65000560
## [97] 0.76854334 0.36613272 0.79577053 0.35718989 1.02813080 0.68331762
## [103] 0.45193771 0.77717972 0.40177756 0.60026978 0.39521005 1.44851198
## [109] 0.73104451 1.10471051 0.52671181 0.71706206 1.08597936 0.60221970
## [115] 0.12370114 2.97574172 0.37752609 3.29218107 1.98376304 0.19076890
## [121] 2.34561868 13.57749617 0.16333710 0.53358992 9.47724799 6.78179458
## [127] 13.65229560 1.03740521 0.78615575 0.85011186 0.65359477 1.06276438
## [133] 1.75003500 1.65692008 1.62645661 19.73065413 0.23289665 18.88606031
## [139] 7.83205212 8.37826024 3.04265403 8.16901408 2.36097400 1.37574398
## [145] 1.39211137 20.26981553 12.63608781 30.57692308 0.50690924 0.21857923
## [151] 0.34881278 0.98643650 2.21858520 0.48100048 14.26810673 3.38665651
## [157] 1.50430023 6.15703983 27.59930324 27.33069639 16.01357525 22.73282816
## [163] 0.38463053 0.30444847 1.07565152 0.84367516 0.59067199 0.51411715
## [169] 0.33959002 0.34827845 0.23325558 0.35369017 0.66530639 0.39285892
## [175] 0.59179837 0.79898204 0.82611324 0.95693780 0.40846639 1.07375861
## [181] 0.95517140 1.40703518 0.71273538 1.99076854 0.55713235 1.91442728
## [187] 0.42701863 1.35732323 0.24340771 4.30473614 15.88946459 2.11131059
## [193] 7.84056067 2.03020952 1.86569548 12.79069767
# Calcular el porcentaje de "Sí usa leña"
hogar$porcentaje_usa_leña <- (hogar$`Sí usa leña` /
(hogar$`Sí usa leña` + hogar$`No usa leña`)) * 100
# Mostrar los resultados
hogar$porcentaje_usa_leña
## [1] 52.819978 64.066621 69.985921 89.267254 86.115740 80.104712 65.749294
## [8] 46.484628 89.077307 91.061879 92.581602 75.358286 84.368515 93.330821
## [15] 46.925651 89.491203 88.457766 28.081189 75.622918 96.836785 85.594192
## [22] 92.557252 91.312105 80.638023 12.561735 91.009524 85.865391 50.802451
## [29] 77.086516 77.426471 86.460835 63.955929 89.070088 84.608070 8.918416
## [36] 14.143374 19.582586 49.291470 32.884674 49.237473 15.602076 83.906214
## [43] 48.031074 89.096842 82.508891 69.069161 75.854883 72.652724 59.306414
## [50] 76.987699 84.847597 87.115150 92.317666 55.490398 84.585394 85.212152
## [57] 83.837907 74.537852 83.353800 85.874995 51.936729 79.303400 87.835841
## [64] 89.645565 91.435538 86.900052 2.180837 11.899053 88.245982 82.650746
## [71] 70.628050 19.643024 51.351351 49.539214 11.564945 70.306872 89.841463
## [78] 86.703874 67.728616 64.432740 53.279818 83.907455 73.926746 78.290860
## [85] 88.502057 73.104391 85.992936 41.497028 74.633392 83.309831 93.309222
## [92] 87.188267 46.556675 85.641164 82.253219 64.395383 67.113494 90.511060
## [99] 13.900373 14.802805 19.410729 46.324222 15.571947 26.392917 68.271748
## [106] 48.109263 63.419357 26.046879 73.431699 44.203004 9.170429 61.238440
## [113] 11.517854 19.557952 91.019297 32.041332 91.128137 85.393025 26.165970
## [120] 79.227386 82.873383 76.744186 80.022616 45.678796 10.027766 45.910861
## [127] 42.700073 1.865120 16.378245 84.921700 47.259930 21.083392 22.716454
## [134] 42.558193 18.086012 43.886276 79.941776 30.417610 60.688062 79.345767
## [141] 83.526066 83.331992 78.825271 85.647380 83.932715 9.146808 35.373907
## [148] 20.416667 25.258663 81.951600 2.761084 27.108959 68.103292 56.469456
## [155] 28.357911 86.837428 88.991400 59.907548 10.959765 15.519597 1.527216
## [162] 32.207284 26.019665 13.139090 51.892277 33.774222 19.058760 35.823852
## [169] 21.381823 7.834061 61.890481 13.998271 17.825436 83.315928 65.856715
## [176] 55.493741 66.424877 80.648591 68.449065 76.273106 58.423674 68.168113
## [183] 58.585088 28.527469 52.612215 12.879535 87.305901 39.551768 86.815416
## [190] 11.643309 18.221071 17.163375 31.645353 74.240702 51.415245 74.127907
# Suponiendo que tu dataset se llama 'data'
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
nuevo_dataset <- hogar %>%
select(porcentaje_usa_leña, porcentaje_usa_carbón, porcentaje_usa_gas, porcentaje_usa_electricidad)
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
# Calcular el índice KMO
kmo_result <- KMO(nuevo_dataset)
# Ver el resultado del KMO
kmo_result
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = nuevo_dataset)
## Overall MSA = 0.63
## MSA for each item =
## porcentaje_usa_leña porcentaje_usa_carbón
## 0.59 0.72
## porcentaje_usa_gas porcentaje_usa_electricidad
## 0.59 0.91
# Realizar un análisis de componentes principales (PCA) para obtener el Scree Plot
pca_result <- prcomp(nuevo_dataset, scale. = TRUE)
# Graficar el Scree Plot
screeplot(pca_result, main = "Scree Plot")

# Realizar el análisis factorial con rotación Oblimin
fa_oblimin <- fa(nuevo_dataset, nfactors = 1, rotate = "oblimin")
# Ver los resultados
fa_oblimin
## Factor Analysis using method = minres
## Call: fa(r = nuevo_dataset, nfactors = 1, rotate = "oblimin")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## porcentaje_usa_leña -0.96 0.913 0.087 1
## porcentaje_usa_carbón 0.28 0.078 0.922 1
## porcentaje_usa_gas 0.91 0.832 0.168 1
## porcentaje_usa_electricidad 0.49 0.241 0.759 1
##
## MR1
## SS loadings 2.06
## Proportion Var 0.52
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 6 with the objective function = 1.76 with Chi Square = 338.91
## df of the model are 2 and the objective function was 0.03
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 196 with the empirical chi square 2.12 with prob < 0.35
## The total n.obs was 196 with Likelihood Chi Square = 6.34 with prob < 0.042
##
## Tucker Lewis Index of factoring reliability = 0.961
## RMSEA index = 0.105 and the 90 % confidence intervals are 0.017 0.203
## BIC = -4.22
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.94
## Minimum correlation of possible factor scores 0.89
# Realizar el análisis factorial con rotación Varimax
fa_varimax <- fa(nuevo_dataset, nfactors = 1, rotate = "varimax")
# Ver los resultados
fa_varimax
## Factor Analysis using method = minres
## Call: fa(r = nuevo_dataset, nfactors = 1, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## porcentaje_usa_leña -0.96 0.913 0.087 1
## porcentaje_usa_carbón 0.28 0.078 0.922 1
## porcentaje_usa_gas 0.91 0.832 0.168 1
## porcentaje_usa_electricidad 0.49 0.241 0.759 1
##
## MR1
## SS loadings 2.06
## Proportion Var 0.52
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 6 with the objective function = 1.76 with Chi Square = 338.91
## df of the model are 2 and the objective function was 0.03
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 196 with the empirical chi square 2.12 with prob < 0.35
## The total n.obs was 196 with Likelihood Chi Square = 6.34 with prob < 0.042
##
## Tucker Lewis Index of factoring reliability = 0.961
## RMSEA index = 0.105 and the 90 % confidence intervals are 0.017 0.203
## BIC = -4.22
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.94
## Minimum correlation of possible factor scores 0.89
# Comprobar la varianza explicada por el factor principal
fa_oblimin$Vaccounted
## MR1
## SS loadings 2.0645612
## Proportion Var 0.5161403
fa_varimax$Vaccounted
## MR1
## SS loadings 2.0645612
## Proportion Var 0.5161403