data origin and preparation

library(readr)
elsi <- read_csv("elsi.csv")
## Rows: 9412 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): homem, e9
## dbl (15): poor, audio1, lowedu1, depre_auto1, physinact_, smoke1, diab1, has...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
elsi<-elsi %>%
  rename(man=homem,
         race_2=e9,
         poor_regions=poor,
         hearing_loss=audio1,
         less_education=lowedu1,
         depression=depre_auto1,
         physical_inactivity=physinact_,
         smoking=smoke1,
         diabetes=diab1,
         hypertension=has1_pspd,
         obesity=obe1,
         exc_alcohol=alc_abuse1,
         social_isolation=alone1,
         air_pollution=pollution1,
         vision_loss=vision1,
         high_cholesterol=coldiag1) %>%  select(less_education, hearing_loss, high_cholesterol, depression, physical_inactivity, smoking, diabetes, 
                                                hypertension, obesity, exc_alcohol, social_isolation, air_pollution, vision_loss, everything())

General

# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho                # Correlación tetrachoric (solo matriz)
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr)                        # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1)                        # Cantidad de eigenvalues > 1

eigen_values <- eigen_res$values  # Extraer eigenvalores
total_variance <- sum(eigen_values)  # Suma total de eigenvalores
explained_variance <- (eigen_values / total_variance) * 100 
explained_variance
##  [1] 14.837777 13.286920  9.822131  9.267741  8.518816  7.350668  7.116688
##  [8]  6.937842  5.700837  5.111016  4.589047  3.856691  3.603826
# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors)      # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr)                 # Agregar nombres de las variables
cols <- paste0("V", 1:n)                              # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados

comunalities %>% select(RF,comunality) %>%
  gt() %>%
  fmt_percent(
    columns = comunality,
    decimals = 2
  )
RF comunality
less_education 44.89%
hearing_loss 26.70%
high_cholesterol 32.50%
depression 43.75%
physical_inactivity 51.48%
smoking 46.49%
diabetes 28.87%
hypertension 45.98%
obesity 25.12%
exc_alcohol 49.61%
social_isolation 21.28%
air_pollution 52.15%
vision_loss 31.17%

Sex

Males

# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(man=="Masculino") %>%
  select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho                # Correlación tetrachoric (solo matriz)
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr)                        # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1)                        # Cantidad de eigenvalues > 1

# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors)      # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr)                 # Agregar nombres de las variables
cols <- paste0("V", 1:n)                              # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados

comunalities %>% select(RF,comunality) %>%
  gt() %>%
  fmt_percent(
    columns = comunality,
    decimals = 2
  )
RF comunality
less_education 45.36%
hearing_loss 25.66%
high_cholesterol 42.69%
depression 49.97%
physical_inactivity 47.16%
smoking 48.88%
diabetes 26.67%
hypertension 40.62%
obesity 28.10%
exc_alcohol 51.64%
social_isolation 16.91%
air_pollution 46.75%
vision_loss 29.59%

Females

# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(man!="Masculino") %>%
  select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho                # Correlación tetrachoric (solo matriz)
## For i = 11 j = 10  A cell entry of 0 was replaced with correct =  0.5.  Check your data!
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr)                        # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1)                        # Cantidad de eigenvalues > 1

# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors)      # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr)                 # Agregar nombres de las variables
cols <- paste0("V", 1:n)                              # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados

comunalities %>% select(RF,comunality) %>%
  gt() %>%
  fmt_percent(
    columns = comunality,
    decimals = 2
  )
RF comunality
less_education 46.26%
hearing_loss 47.16%
high_cholesterol 31.46%
depression 39.12%
physical_inactivity 58.27%
smoking 43.62%
diabetes 33.94%
hypertension 49.19%
obesity 48.45%
exc_alcohol 45.94%
social_isolation 67.49%
air_pollution 56.07%
vision_loss 33.04%

Rich vs poor

Poor

# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(poor_regions==1) %>%
  select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho                # Correlación tetrachoric (solo matriz)
## For i = 11 j = 10  A cell entry of 0 was replaced with correct =  0.5.  Check your data!
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr)                        # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1)                        # Cantidad de eigenvalues > 1

# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors)      # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr)                 # Agregar nombres de las variables
cols <- paste0("V", 1:n)                              # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados

comunalities %>% select(RF,comunality) %>%
  gt() %>%
  fmt_percent(
    columns = comunality,
    decimals = 2
  )
RF comunality
less_education 48.80%
hearing_loss 52.82%
high_cholesterol 36.79%
depression 57.03%
physical_inactivity 49.74%
smoking 54.76%
diabetes 35.24%
hypertension 52.22%
obesity 28.17%
exc_alcohol 47.19%
social_isolation 43.67%
air_pollution 48.94%
vision_loss 44.63%

Rich

# Paso 1: Selección de datos y cálculo de correlación tetrachoric
basecom <- elsi %>% filter(poor_regions==0) %>%
  select(less_education:vision_loss) # Selección de variables
tetra_corr <- tetrachoric(basecom)$rho                # Correlación tetrachoric (solo matriz)
# Paso 2: PCA y extracción de eigenvalues > 1
eigen_res <- eigen(tetra_corr)                        # Eigenvalues y eigenvectors
n <- sum(eigen_res$values > 1)                        # Cantidad de eigenvalues > 1

# Paso 3: Cálculo de comunalidades
comunalities <- as.data.frame(eigen_res$vectors)      # Convertir eigenvectors en dataframe
comunalities$RF <- row.names(tetra_corr)                 # Agregar nombres de las variables
cols <- paste0("V", 1:n)                              # Columnas a incluir en el cálculo
comunalities$comunality <- rowSums(comunalities[, cols]^2) # Sumar cuadrados de eigenvectors seleccionados

comunalities %>% select(RF,comunality) %>%
  gt() %>%
  fmt_percent(
    columns = comunality,
    decimals = 2
  )
RF comunality
less_education 45.98%
hearing_loss 25.54%
high_cholesterol 35.22%
depression 44.26%
physical_inactivity 47.54%
smoking 47.44%
diabetes 31.12%
hypertension 41.05%
obesity 28.33%
exc_alcohol 50.05%
social_isolation 19.52%
air_pollution 53.96%
vision_loss 30.00%