par=c("edad_imc","edad_hdl", "edad_ldl","edad_sexo",   "edad_hta","edad_dm", "edad_fuma","edad_soc",
                 "imc_hdl",   "imc_ldl", "imc_sexo",   "imc_hta",  "imc_dm",  "imc_fuma","imc_soc",
                              "hdl_ldl",   "hdl_sexo", "hdl_hta",  "hdl_dm",  "hdl_fuma","hdl_soc",
                                           "ldl_sexo", "ldl_hta",  "ldl_dm",  "ldl_fuma","ldl_soc",
                                                      "sexo_hta", "sexo_dm", "sexo_fuma","sexo_soc",
                                                                   "hta_dm",  "hta_fuma","hta_soc",
                                                                               "dm_fuma","dm_soc",
                                                                                         "fuma_soc")



vectorCorrelaciones=c(
  0.40, -0.40,   0.40,      0,      0.44,   0.35,   0.25,    -0.25,
           -0.45,    0.45,   0.15,    0.55, 0.40,   -0.05,  -0.40,
                    -0.55,   0.05,   -0.55, -0.45, -0.1,     0.40,
                               -0.05,   0.55,   0.45,   0.1,   -0.4,
                                           0.1, 0.1,    0.3,    0,
                                                  0.1,  0.20,  -0.4,
                                                          0.20, -0.4,
                                                                  -0.20
) %>% set_names(par)







ordenLLenado=c("edad_imc",
               "edad_hdl", "imc_hdl",
               "edad_ldl", "imc_ldl", "hdl_ldl",
               "edad_sexo","imc_sexo","hdl_sexo","ldl_sexo",
               "edad_hta", "imc_hta", "hdl_hta", "ldl_hta", "sexo_hta",
               "edad_dm",  "imc_dm",  "hdl_dm", "ldl_dm",   "sexo_dm",  "hta_dm",
               "edad_fuma","imc_fuma","hdl_fuma","ldl_fuma","sexo_fuma","hta_fuma","dm_fuma",
               "edad_soc", "imc_soc", "hdl_soc", "ldl_soc", "sexo_soc", "hta_soc", "dm_soc", "fuma_soc")


matrizCorrelaciones<-vector_a_matriz_simetrica(vectorCorrelaciones[ordenLLenado],9)


eigen(matrizCorrelaciones)$values %>% sort()
## [1] 0.1949300 0.3516115 0.4500000 0.5834265 0.7304160 0.8704806 0.9335323
## [8] 1.3069318 3.5786712
datosStd <- mvrnorm(n = 20000, mu=rep(0,9),Sigma = matrizCorrelaciones) %>% as_tibble() %>% 
  set_names(c("edad","imc","hdl","ldl","sexo","hta","dm","fuma","soc"))
parametros=list(
  edad=list(
    media=50,
    desviacion=18,
    p=0.5
  ),
  imc=list(
    media=26,
    desviacion=3.5,
    p=0.5
  ),
  hdl=list(
    media=44,
    desviacion=10,
    p=0.5
  ),
  ldl=list(
    media=110,
    desviacion=30,
    p=0.5
  ),
  sexo=list(
    p=0.5
  ),
  hta=list(
    p=0.21
  ),
  dm=list(
    p=0.08
  ),
  fuma=list(
    p=0.22
  ),
  soc=list(
    p=c(0.42,0.30)
  )
)


#escala(datosStd$edad,parametros$edad)

#escala(datosStd$sexo,parametros$sexo)


#escala(datosStd$soc,parametros$soc)


datos=datosStd %>% 
  mutate(
    edad=escala(edad,parametros$edad) %>% round(2),
    imc=escala(imc,parametros$imc) %>% round(3),
    hdl=escala(hdl,parametros$hdl) %>% round(1),
    ldl=escala(ldl,parametros$ldl) %>% round(1),
    sexo=escala(sexo,parametros$sexo) %>% factor(levels=c(0,1),labels=c("Mujer","Hombre")),
    hta=escala(hta,parametros$hta)%>% factor(levels=c(0,1),labels=c("No","Sí")),
    dm=escala(dm,parametros$dm)%>% factor(levels=c(0,1),labels=c("No","Sí")),
    fuma=escala(fuma,parametros$fuma)%>% factor(levels=c(0,1),labels=c("No","Sí")),
    soc=escala(soc,parametros$soc)%>% factor(levels=c(0,1,2),labels=c("Baja","Media","Alta"))
  )%>% filter(edad>=18 & edad<=80,
              hdl>=24 & hdl<=70,
              ldl>=38 & ldl<=220,
              imc>=16 & imc<=40)



datos %>% head()
## # A tibble: 6 × 9
##    edad   imc   hdl   ldl sexo   hta   dm    fuma  soc  
##   <dbl> <dbl> <dbl> <dbl> <fct>  <fct> <fct> <fct> <fct>
## 1  70.0  27.7  40   146.  Hombre No    No    No    Baja 
## 2  55.2  27.1  43   124.  Hombre No    No    No    Media
## 3  35.9  28.1  47.1  75.9 Mujer  No    No    No    Media
## 4  55.5  21.5  33.8 147.  Mujer  No    No    No    Media
## 5  64.0  29.2  43.6 112.  Hombre No    Sí    No    Baja 
## 6  35.5  28.5  46.3 114.  Mujer  No    No    No    Media
datos %>%   tbl_summary(
#    include=c(edad,imc,hdl,ldl),
    by = sexo, 
    missing = "no",
      statistic = list(all_continuous() ~ "{mean} ({sd})",
                   all_categorical() ~ "{n} ({p}%)")
  ) |> 
  add_n() |>
  add_p(test = list(all_continuous() ~ "t.test",
                   all_categorical() ~ "chisq.test")) |>
  modify_header(label = "**Variable**") |> 
  bold_labels() 
Variable N Mujer
N = 8,832
1
Hombre
N = 8,918
1
p-value2
edad 17,750 50 (15) 50 (15) >0.9
imc 17,750 25.6 (3.3) 26.3 (3.3) <0.001
hdl 17,750 44 (9) 45 (9) <0.001
ldl 17,750 111 (28) 108 (28) <0.001
hta 17,750

<0.001
    No
7,320 (83%) 6,982 (78%)
    Sí
1,512 (17%) 1,936 (22%)
dm 17,750

<0.001
    No
8,303 (94%) 8,197 (92%)
    Sí
529 (6.0%) 721 (8.1%)
fuma 17,750

<0.001
    No
7,565 (86%) 6,327 (71%)
    Sí
1,267 (14%) 2,591 (29%)
soc 17,750

0.7
    Baja
2,425 (27%) 2,402 (27%)
    Media
3,759 (43%) 3,829 (43%)
    Alta
2,648 (30%) 2,687 (30%)
1 Mean (SD); n (%)
2 Welch Two Sample t-test; Pearson’s Chi-squared test

hdl

datos %>% ggplot(aes(x=hdl))+geom_histogram(bins=20)+
  theme_minimal()

datos %>% finalfit(dependent = "hdl", explanatory=c("edad","imc", "sexo", "hta", "fuma","soc"),metrics = TRUE)%>% .[[1]] %>% knitr::kable()
Dependent: hdl unit value Coefficient (univariable) Coefficient (multivariable)
1 edad [18.0,80.0] Mean (sd) 44.4 (9.0) -0.19 (-0.20 to -0.18, p<0.001) -0.10 (-0.11 to -0.09, p<0.001)
6 imc [16.1,38.2] Mean (sd) 44.4 (9.0) -1.06 (-1.09 to -1.02, p<0.001) -0.61 (-0.65 to -0.57, p<0.001)
8 sexo Mujer Mean (sd) 44.0 (9.0) - -
7 Hombre Mean (sd) 44.8 (9.0) 0.75 (0.48 to 1.01, p<0.001) 1.42 (1.18 to 1.65, p<0.001)
4 hta No Mean (sd) 45.9 (8.7) - -
5 Mean (sd) 38.3 (7.7) -7.56 (-7.87 to -7.24, p<0.001) -4.06 (-4.38 to -3.75, p<0.001)
2 fuma No Mean (sd) 44.5 (9.0) - -
3 Mean (sd) 43.9 (9.1) -0.61 (-0.94 to -0.29, p<0.001) -0.15 (-0.44 to 0.15, p=0.327)
10 soc Baja Mean (sd) 40.4 (8.2) - -
11 Media Mean (sd) 44.3 (8.6) 3.97 (3.66 to 4.28, p<0.001) 2.07 (1.78 to 2.36, p<0.001)
9 Alta Mean (sd) 48.2 (8.7) 7.80 (7.47 to 8.14, p<0.001) 4.33 (4.00 to 4.65, p<0.001)

ldl

datos %>% ggplot(aes(x=ldl))+geom_histogram(bins=20)+
  theme_minimal()

datos %>% finalfit(dependent = "ldl", explanatory=c("edad","imc", "sexo","hta","fuma","soc"),metrics = TRUE)%>% .[[1]] %>% knitr::kable()
Dependent: ldl unit value Coefficient (univariable) Coefficient (multivariable)
1 edad [18.0,80.0] Mean (sd) 109.5 (27.9) 0.59 (0.56 to 0.62, p<0.001) 0.29 (0.26 to 0.32, p<0.001)
6 imc [16.1,38.2] Mean (sd) 109.5 (27.9) 3.32 (3.21 to 3.43, p<0.001) 1.90 (1.77 to 2.02, p<0.001)
8 sexo Mujer Mean (sd) 111.0 (27.8) - -
7 Hombre Mean (sd) 108.1 (27.9) -2.86 (-3.68 to -2.04, p<0.001) -5.09 (-5.81 to -4.37, p<0.001)
4 hta No Mean (sd) 104.6 (26.2) - -
5 Mean (sd) 130.0 (25.0) 25.36 (24.39 to 26.33, p<0.001) 14.76 (13.80 to 15.72, p<0.001)
2 fuma No Mean (sd) 109.0 (27.8) - -
3 Mean (sd) 111.2 (28.0) 2.16 (1.16 to 3.15, p<0.001) 0.79 (-0.10 to 1.67, p=0.082)
10 soc Baja Mean (sd) 122.1 (26.6) - -
11 Media Mean (sd) 109.8 (26.3) -12.38 (-13.33 to -11.43, p<0.001) -6.19 (-7.07 to -5.31, p<0.001)
9 Alta Mean (sd) 97.7 (26.1) -24.40 (-25.42 to -23.37, p<0.001) -13.17 (-14.17 to -12.17, p<0.001)

imc

datos %>% ggplot(aes(x=imc))+geom_histogram(bins=20)+
  theme_minimal()

datos %>% finalfit(dependent = "imc", explanatory=c("edad", "hdl", "ldl",  "sexo","hta","dm","fuma","soc"),metrics = TRUE) %>% .[[1]] %>% knitr::kable()
Dependent: imc unit value Coefficient (univariable) Coefficient (multivariable)
3 edad [18.0,80.0] Mean (sd) 26.0 (3.3) 0.07 (0.07 to 0.08, p<0.001) 0.04 (0.04 to 0.04, p<0.001)
6 hdl [24.0,70.0] Mean (sd) 26.0 (3.3) -0.14 (-0.15 to -0.14, p<0.001) -0.05 (-0.06 to -0.05, p<0.001)
9 ldl [38.0,212.2] Mean (sd) 26.0 (3.3) 0.05 (0.05 to 0.05, p<0.001) 0.02 (0.02 to 0.02, p<0.001)
11 sexo Mujer Mean (sd) 25.6 (3.3) - -
10 Hombre Mean (sd) 26.3 (3.3) 0.78 (0.68 to 0.87, p<0.001) 0.99 (0.91 to 1.07, p<0.001)
7 hta No Mean (sd) 25.4 (3.2) - -
8 Mean (sd) 28.3 (3.0) 2.92 (2.80 to 3.03, p<0.001) 1.40 (1.29 to 1.51, p<0.001)
1 dm No Mean (sd) 25.8 (3.3) - -
2 Mean (sd) 28.1 (3.2) 2.28 (2.09 to 2.47, p<0.001) 0.84 (0.67 to 1.00, p<0.001)
4 fuma No Mean (sd) 26.1 (3.3) - -
5 Mean (sd) 25.5 (3.3) -0.61 (-0.73 to -0.49, p<0.001) -1.35 (-1.45 to -1.25, p<0.001)
13 soc Baja Mean (sd) 27.5 (3.1) - -
14 Media Mean (sd) 26.0 (3.2) -1.51 (-1.63 to -1.40, p<0.001) -0.75 (-0.86 to -0.65, p<0.001)
12 Alta Mean (sd) 24.5 (3.1) -2.98 (-3.10 to -2.85, p<0.001) -1.57 (-1.69 to -1.46, p<0.001)

hta

datos %>% finalfit(dependent = "hta", explanatory=c("edad",  "imc", "hdl", "ldl",  "sexo","fuma","soc"),metrics = TRUE)%>% .[[1]] %>% knitr::kable()
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Dependent: hta No OR (univariable) OR (multivariable)
1 edad Mean (SD) 48.0 (14.3) 56.8 (13.3) 1.05 (1.04-1.05, p<0.001) 1.02 (1.01-1.02, p<0.001)
5 imc Mean (SD) 25.4 (3.2) 28.3 (3.0) 1.35 (1.33-1.37, p<0.001) 1.20 (1.18-1.22, p<0.001)
4 hdl Mean (SD) 45.9 (8.7) 38.3 (7.7) 0.90 (0.89-0.90, p<0.001) 0.95 (0.94-0.95, p<0.001)
6 ldl Mean (SD) 104.6 (26.2) 130.0 (25.0) 1.04 (1.04-1.04, p<0.001) 1.02 (1.02-1.02, p<0.001)
8 sexo Mujer 7320 (82.9) 1512 (17.1) - -
7 Hombre 6982 (78.3) 1936 (21.7) 1.34 (1.25-1.45, p<0.001) 1.31 (1.20-1.43, p<0.001)
2 fuma No 11419 (82.2) 2473 (17.8) - -
3 2883 (74.7) 975 (25.3) 1.56 (1.43-1.70, p<0.001) 1.61 (1.45-1.78, p<0.001)
10 soc Baja 3226 (66.8) 1601 (33.2) - -
11 Media 6239 (82.2) 1349 (17.8) 0.44 (0.40-0.47, p<0.001) 0.79 (0.72-0.87, p<0.001)
9 Alta 4837 (90.7) 498 (9.3) 0.21 (0.19-0.23, p<0.001) 0.72 (0.64-0.82, p<0.001)

dm

datos %>% finalfit(dependent = "dm", explanatory=c("edad",  "imc", "hdl", "ldl",  "sexo","fuma","soc"),metrics = TRUE)%>% .[[1]] %>% knitr::kable()
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Dependent: dm No OR (univariable) OR (multivariable)
1 edad Mean (SD) 49.2 (14.5) 56.8 (13.3) 1.04 (1.03-1.04, p<0.001) 1.01 (1.01-1.02, p<0.001)
5 imc Mean (SD) 25.8 (3.3) 28.1 (3.2) 1.23 (1.21-1.26, p<0.001) 1.07 (1.05-1.10, p<0.001)
4 hdl Mean (SD) 44.9 (8.9) 37.9 (7.6) 0.91 (0.90-0.92, p<0.001) 0.95 (0.95-0.96, p<0.001)
6 ldl Mean (SD) 107.9 (27.3) 130.6 (26.4) 1.03 (1.03-1.03, p<0.001) 1.02 (1.01-1.02, p<0.001)
8 sexo Mujer 8303 (94.0) 529 (6.0) - -
7 Hombre 8197 (91.9) 721 (8.1) 1.38 (1.23-1.55, p<0.001) 1.40 (1.23-1.59, p<0.001)
2 fuma No 13021 (93.7) 871 (6.3) - -
3 3479 (90.2) 379 (9.8) 1.63 (1.43-1.85, p<0.001) 1.35 (1.17-1.55, p<0.001)
10 soc Baja 4134 (85.6) 693 (14.4) - -
11 Media 7147 (94.2) 441 (5.8) 0.37 (0.32-0.42, p<0.001) 0.58 (0.51-0.66, p<0.001)
9 Alta 5219 (97.8) 116 (2.2) 0.13 (0.11-0.16, p<0.001) 0.34 (0.27-0.42, p<0.001)

fuma

datos %>% finalfit(dependent = "fuma", explanatory=c("edad",  "imc",  "sexo","soc"),metrics = TRUE) %>% .[[1]] %>% knitr::kable()
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Dependent: fuma No OR (univariable) OR (multivariable)
1 edad Mean (SD) 48.8 (14.5) 53.0 (14.3) 1.02 (1.02-1.02, p<0.001) 1.03 (1.03-1.03, p<0.001)
2 imc Mean (SD) 26.1 (3.3) 25.5 (3.3) 0.95 (0.94-0.96, p<0.001) 0.85 (0.84-0.86, p<0.001)
4 sexo Mujer 7565 (85.7) 1267 (14.3) - -
3 Hombre 6327 (70.9) 2591 (29.1) 2.45 (2.27-2.64, p<0.001) 2.95 (2.72-3.19, p<0.001)
6 soc Baja 3464 (71.8) 1363 (28.2) - -
7 Media 5969 (78.7) 1619 (21.3) 0.69 (0.63-0.75, p<0.001) 0.57 (0.52-0.62, p<0.001)
5 Alta 4459 (83.6) 876 (16.4) 0.50 (0.45-0.55, p<0.001) 0.35 (0.31-0.39, p<0.001)
datos %>% 
  ggplot(aes(x=edad))+geom_histogram(bins=20)+
  theme_minimal()

haven::write_sav(datos, "datos.sav")