Data

library(rio)

## Warning: package 'rio' was built under R version 4.3.3

data1 = import("reporte.xlsx")

Limpieza

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)

## Warning: package 'tidyr' was built under R version 4.3.3

# Suponiendo que tu dataframe se llama df y la columna se llama 'columna'

data1 <- data1 %>%
  mutate(Provincia = gsub(",|:|provincia", "", Provincia)) %>% # Elimina la coma, los dos puntos y la palabra 'provincia'
  separate(Provincia, into = c("Departamento", "Provincia"), sep = " ", extra = "merge") # Separa las palabras restantes en dos columnas nuevas

str(data1)

## 'data.frame':    196 obs. of  22 variables:
##  $ Código                : num  101 102 103 104 105 106 107 201 202 203 ...
##  $ Departamento          : chr  "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
##  $ Provincia             : chr  " Chachapoyas" " Bagua" " Bongara" " Condorcanqui" ...
##  $ Urbano encuesta       : num  9640 10869 3674 945 1877 ...
##  $ Rural encuesta        : num  5697 9605 4139 8922 11325 ...
##  $ Hogar 1               : num  13999 19520 7308 9716 12757 ...
##  $ Hogar 2               : num  1032 666 380 107 381 ...
##  $ Hogar 3               : num  210 165 87 32 51 37 199 952 4 13 ...
##  $ Hogar 4               : num  61 71 24 6 10 13 60 289 1 2 ...
##  $ Hogar 5               : num  27 33 8 2 2 6 22 94 0 1 ...
##  $ Hogar 6               : num  7 13 5 2 1 3 4 22 0 0 ...
##  $ Hogar 7               : num  1 5 1 2 0 1 1 7 0 0 ...
##  $ Hogar 8               : num  0 1 0 0 0 0 0 1 0 0 ...
##  $ Hogar 9               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ No usa electricidad   : num  14763 20313 7689 9853 13112 ...
##  $ Sí usa electricidad   : num  574 161 124 14 90 65 255 921 16 33 ...
##  $ No usa gas (balón GLP): num  4696 10557 3154 8331 6863 ...
##  $ Sí usa gas (balón GLP): num  10641 9917 4659 1536 6339 ...
##  $ No usa carbón         : num  15161 20185 7755 9841 13169 ...
##  $ Sí usa carbón         : num  176 289 58 26 33 26 335 218 4 4 ...
##  $ No usa leña           : num  7236 7357 2345 1059 1833 ...
##  $ Sí usa leña           : num  8101 13117 5468 8808 11369 ...

library(dplyr)
data1$total_elec <- (data1$`No usa electricidad`+ data1$`Sí usa electricidad`)

data1 <- data1 %>%
  mutate(porcentaje_SI_elec = (`Sí usa electricidad`/total_elec) * 100)

data1$total_gas <- (data1$`No usa gas (balón GLP)` + data1$`Sí usa gas (balón GLP)`)

data1 <- data1 %>%
  mutate(porcentaje_SI_gas = (`Sí usa gas (balón GLP)`/total_gas) * 100)

data1$total_carbon <- (data1$`No usa carbón` + data1$`Sí usa carbón`)

data1<- data1 %>%
  mutate(porcentaje_SI_carbon = (`Sí usa carbón`/total_carbon) * 100)

data1$total_lena <- (data1$`No usa leña` + data1$`Sí usa leña`)

data1<- data1 %>%
  mutate(porcentaje_SI_lena = (`Sí usa leña`/total_lena) * 100)

pregunta

UNA vez tenga esas variables, obtenga los porcentajes de los conteos positivos para todos los casos (por ejemplo, porcentaje de hogares que sí usan electricidad). Para las preguntas, use esos sólo esos porcentajes.

theData <- data1 %>%
select("porcentaje_SI_gas", "porcentaje_SI_elec", "porcentaje_SI_carbon", "porcentaje_SI_lena")

corMatrix=polycor::hetcor(theData)$correlations

round(corMatrix,2)

##                      porcentaje_SI_gas porcentaje_SI_elec porcentaje_SI_carbon
## porcentaje_SI_gas                 1.00               0.47                 0.23
## porcentaje_SI_elec                0.47               1.00                 0.10
## porcentaje_SI_carbon              0.23               0.10                 1.00
## porcentaje_SI_lena               -0.87              -0.46                -0.31
##                      porcentaje_SI_lena
## porcentaje_SI_gas                 -0.87
## porcentaje_SI_elec                -0.46
## porcentaje_SI_carbon              -0.31
## porcentaje_SI_lena                 1.00

library(ggcorrplot)

## Warning: package 'ggcorrplot' was built under R version 4.3.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.3.3

ggcorrplot(corMatrix)

library(psych)

## Warning: package 'psych' was built under R version 4.3.3

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

psych::KMO(corMatrix)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.63
## MSA for each item = 
##    porcentaje_SI_gas   porcentaje_SI_elec porcentaje_SI_carbon 
##                 0.59                 0.91                 0.72 
##   porcentaje_SI_lena 
##                 0.59

cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05

## [1] FALSE

library(matrixcalc)
is.singular.matrix(corMatrix)

## [1] FALSE

fa.parallel(theData, fa = 'fa',correct = T,plot = F)

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA

#Colocamos el NUMERO que nos dio el codigo anterior
library(GPArotation)

## Warning: package 'GPArotation' was built under R version 4.3.3

## 
## Attaching package: 'GPArotation'

## The following objects are masked from 'package:psych':
## 
##     equamax, varimin

resfa <- fa(theData,
            nfactors = 2,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")
print(resfa$loadings)

## 
## Loadings:
##                      MR1    MR2   
## porcentaje_SI_gas     0.872  0.321
## porcentaje_SI_elec    0.510       
## porcentaje_SI_carbon  0.111  0.415
## porcentaje_SI_lena   -0.793 -0.542
## 
##                  MR1   MR2
## SS loadings    1.661 0.578
## Proportion Var 0.415 0.145
## Cumulative Var 0.415 0.560

#Colocamos el NUMERO que nos dio el codigo anterior
library(GPArotation)
resfa <- fa(theData,
            nfactors = 2,
            cor = 'mixed',
            rotate = "oblimin", #oblimin?
            fm="minres")
print(resfa$loadings)

## 
## Loadings:
##                      MR1    MR2   
## porcentaje_SI_gas     0.945       
## porcentaje_SI_elec    0.530  0.143
## porcentaje_SI_carbon  0.211 -0.332
## porcentaje_SI_lena   -0.920  0.140
## 
##                  MR1   MR2
## SS loadings    2.066 0.160
## Proportion Var 0.517 0.040
## Cumulative Var 0.517 0.556

print(resfa$loadings,cutoff = 0.5)

## 
## Loadings:
##                      MR1    MR2   
## porcentaje_SI_gas     0.945       
## porcentaje_SI_elec    0.530       
## porcentaje_SI_carbon              
## porcentaje_SI_lena   -0.920       
## 
##                  MR1   MR2
## SS loadings    2.066 0.160
## Proportion Var 0.517 0.040
## Cumulative Var 0.517 0.556

fa.diagram(resfa,main = "Resultados del EFA")

resfa$TLI

## [1] 1.018151

resfa$RMSEA

## NULL

Final

Yhara

2024-06-26

Data

Limpieza

pregunta