data origin and preparation
library(readr)
elsi <- read_csv("elsi.csv")
## Rows: 9412 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): homem, e9
## dbl (15): poor, audio1, lowedu1, depre_auto1, physinact_, smoke1, diab1, has...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
elsi<-elsi %>%
rename(man=homem,
race_2=e9,
poor_regions=poor,
hearing_loss=audio1,
less_education=lowedu1,
depression=depre_auto1,
physical_inactivity=physinact_,
smoking=smoke1,
diabetes=diab1,
hypertension=has1_pspd,
obesity=obe1,
exc_alcohol=alc_abuse1,
social_isolation=alone1,
air_pollution=pollution1,
vision_loss=vision1,
high_cholesterol=coldiag1) %>% select(less_education, hearing_loss, high_cholesterol, depression, physical_inactivity, smoking, diabetes,
hypertension, obesity, exc_alcohol, social_isolation, air_pollution, vision_loss, everything())
General
# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho # Correlación tetrachoric (solo matriz)
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr) # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1) # Cantidad de eigenvalues > 1
eigen_values <- eigen_res$values # Extraer eigenvalores
total_variance <- sum(eigen_values) # Suma total de eigenvalores
explained_variance <- (eigen_values / total_variance) * 100
explained_variance
## [1] 14.837777 13.286920 9.822131 9.267741 8.518816 7.350668 7.116688
## [8] 6.937842 5.700837 5.111016 4.589047 3.856691 3.603826
# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors) # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr) # Agregar nombres de las variables
cols <- paste0("V", 1:n) # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados
comunalities %>% select(RF,comunality) %>%
gt() %>%
fmt_percent(
columns = comunality,
decimals = 2
)
| RF |
comunality |
| less_education |
44.89% |
| hearing_loss |
26.70% |
| high_cholesterol |
32.50% |
| depression |
43.75% |
| physical_inactivity |
51.48% |
| smoking |
46.49% |
| diabetes |
28.87% |
| hypertension |
45.98% |
| obesity |
25.12% |
| exc_alcohol |
49.61% |
| social_isolation |
21.28% |
| air_pollution |
52.15% |
| vision_loss |
31.17% |
Sex
Males
# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(man=="Masculino") %>%
select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho # Correlación tetrachoric (solo matriz)
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr) # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1) # Cantidad de eigenvalues > 1
# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors) # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr) # Agregar nombres de las variables
cols <- paste0("V", 1:n) # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados
comunalities %>% select(RF,comunality) %>%
gt() %>%
fmt_percent(
columns = comunality,
decimals = 2
)
| RF |
comunality |
| less_education |
45.36% |
| hearing_loss |
25.66% |
| high_cholesterol |
42.69% |
| depression |
49.97% |
| physical_inactivity |
47.16% |
| smoking |
48.88% |
| diabetes |
26.67% |
| hypertension |
40.62% |
| obesity |
28.10% |
| exc_alcohol |
51.64% |
| social_isolation |
16.91% |
| air_pollution |
46.75% |
| vision_loss |
29.59% |
Females
# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(man!="Masculino") %>%
select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho # Correlación tetrachoric (solo matriz)
## For i = 11 j = 10 A cell entry of 0 was replaced with correct = 0.5. Check your data!
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr) # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1) # Cantidad de eigenvalues > 1
# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors) # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr) # Agregar nombres de las variables
cols <- paste0("V", 1:n) # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados
comunalities %>% select(RF,comunality) %>%
gt() %>%
fmt_percent(
columns = comunality,
decimals = 2
)
| RF |
comunality |
| less_education |
46.26% |
| hearing_loss |
47.16% |
| high_cholesterol |
31.46% |
| depression |
39.12% |
| physical_inactivity |
58.27% |
| smoking |
43.62% |
| diabetes |
33.94% |
| hypertension |
49.19% |
| obesity |
48.45% |
| exc_alcohol |
45.94% |
| social_isolation |
67.49% |
| air_pollution |
56.07% |
| vision_loss |
33.04% |
Rich vs poor
Poor
# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(poor_regions==1) %>%
select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho # Correlación tetrachoric (solo matriz)
## For i = 11 j = 10 A cell entry of 0 was replaced with correct = 0.5. Check your data!
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr) # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1) # Cantidad de eigenvalues > 1
# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors) # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr) # Agregar nombres de las variables
cols <- paste0("V", 1:n) # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados
comunalities %>% select(RF,comunality) %>%
gt() %>%
fmt_percent(
columns = comunality,
decimals = 2
)
| RF |
comunality |
| less_education |
48.80% |
| hearing_loss |
52.82% |
| high_cholesterol |
36.79% |
| depression |
57.03% |
| physical_inactivity |
49.74% |
| smoking |
54.76% |
| diabetes |
35.24% |
| hypertension |
52.22% |
| obesity |
28.17% |
| exc_alcohol |
47.19% |
| social_isolation |
43.67% |
| air_pollution |
48.94% |
| vision_loss |
44.63% |
Rich
# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(poor_regions==0) %>%
select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho # Correlación tetrachoric (solo matriz)
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr) # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1) # Cantidad de eigenvalues > 1
# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors) # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr) # Agregar nombres de las variables
cols <- paste0("V", 1:n) # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados
comunalities %>% select(RF,comunality) %>%
gt() %>%
fmt_percent(
columns = comunality,
decimals = 2
)
| RF |
comunality |
| less_education |
45.98% |
| hearing_loss |
25.54% |
| high_cholesterol |
35.22% |
| depression |
44.26% |
| physical_inactivity |
47.54% |
| smoking |
47.44% |
| diabetes |
31.12% |
| hypertension |
41.05% |
| obesity |
28.33% |
| exc_alcohol |
50.05% |
| social_isolation |
19.52% |
| air_pollution |
53.96% |
| vision_loss |
30.00% |