df=dfWalking %>% left_join(dfBasal) %>% left_join(dfLongitudinal) %>% left_join(dfGIS) %>% select(all_of(names(dfBasal)), everything()) %>% mutate(Nodo=str_sub(Codigo,3,end = 4)) %>% select(Codigo,Nodo,everything()) %>%
  filter(Nodo %in% c("01","18"))
## Joining with `by = join_by(Codigo)`
## Joining with `by = join_by(Codigo, Tiempo)`
## Joining with `by = join_by(Codigo)`

La base de datos en la que se basa este informe está en el siguiente fichero:

df %>%   haven::write_sav("salida/datosAbstract-Malaga.sav")

Descriptiva de la base de datos

El total de participantes en el estudio se almacena en esta base de datos:

dfTransversal=dfBasal %>% left_join(dfGIS) %>% 
  inner_join(df %>% group_by(Codigo,Nodo) %>% summarise(numTiempos=n()) %>% ungroup()) %>% 
  select(Nodo,Codigo,numTiempos,everything())
## Joining with `by = join_by(Codigo)`
## `summarise()` has grouped output by 'Codigo'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(Codigo)`
dfTransversal %>% haven::write_sav("salida/datosAbstract-Malaga-Transversal.sav")
dfTransversal
## # A tibble: 427 × 22
##    Nodo  Codigo numTiempos grupo_int sexo_s1 diab_prev_s1 HOMA_IR escolarizacion
##    <chr> <chr>       <int> <fct>     <fct>   <fct>          <dbl> <fct>         
##  1 01    P2014…          4 B         Hombre  No             NA    Escuela secun…
##  2 01    P2014…          5 B         Mujer   Sí             NA    Escuela secun…
##  3 01    P2012…          7 B         Hombre  No              3.45 Escuela prima…
##  4 01    P2013…          7 B         Hombre  Sí              4.20 Escuela prima…
##  5 01    P2015…          6 A         Hombre  Sí              8.36 Escuela secun…
##  6 01    P2010…          7 B         Hombre  Sí              4.07 Escuela prima…
##  7 01    P2011…          1 B         Hombre  Sí              4.43 Escuela secun…
##  8 01    P2012…          2 B         Hombre  Sí              6.55 Técnico escu…
##  9 01    P2013…          3 A         Mujer   Sí             NA    Escuela secun…
## 10 01    P2010…          6 B         Hombre  Sí             NA    Escuela secun…
## # ℹ 417 more rows
## # ℹ 14 more variables: estadocivil <fct>, local_nh_p <dbl>, local_nh_i <dbl>,
## #   local_dail <dbl>, local_walk <dbl>, NDVI_2015 <dbl>, NDVI_2016 <dbl>,
## #   NDVI_2017 <dbl>, NDVI_2018 <dbl>, NDVI_2019 <dbl>, NDVI_2020 <dbl>,
## #   NDVI_2021 <dbl>, NDVI_2022 <dbl>, NDVI_2023 <dbl>

Usamos tableone para describir las variables más relevantes de esta base de datos:

tableone::CreateTableOne(vars = c("sexo_s1","diab_prev_s1","HOMA_IR","escolarizacion","estadocivil","local_walk"), data = dfTransversal, factorVars = c("sexo_s1","diab_prev_s1","escolarizacion","estadocivil"))
##                                    
##                                     Overall     
##   n                                  427        
##   sexo_s1 = Mujer (%)                214 (50.1) 
##   diab_prev_s1 = Sí (%)             167 (39.1) 
##   HOMA_IR (mean (SD))               5.03 (3.46) 
##   escolarizacion (%)                            
##      Titulado superior o similares    25 ( 5.9) 
##      Técnico escuela universitaria   49 (11.5) 
##      Escuela secundaria o bachiller  147 (34.4) 
##      Escuela primaria                198 (46.4) 
##      No sabe leer ni escribir          8 ( 1.9) 
##   estadocivil (%)                               
##      Soltero/a                        29 ( 6.8) 
##      Casado/a                        303 (71.0) 
##      Viudo/a                          43 (10.1) 
##      Divorciado/a                     34 ( 8.0) 
##      Separado/a                       18 ( 4.2) 
##   local_walk (mean (SD))            0.74 (2.01)
tableone::CreateTableOne(vars = c("Nodo", "diab_prev_s1","HOMA_IR","escolarizacion","estadocivil","local_walk"), data = dfTransversal, factorVars = c("sexo_s1","diab_prev_s1","escolarizacion","estadocivil"),strata = "sexo_s1")
##                                    Stratified by sexo_s1
##                                     Hombre       Mujer        p      test
##   n                                  213          214                    
##   Nodo = 18 (%)                       52 (24.4)    57 (26.6)   0.678     
##   diab_prev_s1 = Sí (%)              96 (45.1)    71 (33.2)   0.016     
##   HOMA_IR (mean (SD))               5.52 (4.24)  4.55 (2.40)   0.007     
##   escolarizacion (%)                                           0.062     
##      Titulado superior o similares    13 ( 6.1)    12 ( 5.6)             
##      Técnico escuela universitaria   30 (14.1)    19 ( 8.9)             
##      Escuela secundaria o bachiller   81 (38.0)    66 (30.8)             
##      Escuela primaria                 87 (40.8)   111 (51.9)             
##      No sabe leer ni escribir          2 ( 0.9)     6 ( 2.8)             
##   estadocivil (%)                                             <0.001     
##      Soltero/a                        13 ( 6.1)    16 ( 7.5)             
##      Casado/a                        171 (80.3)   132 (61.7)             
##      Viudo/a                           6 ( 2.8)    37 (17.3)             
##      Divorciado/a                     14 ( 6.6)    20 ( 9.3)             
##      Separado/a                        9 ( 4.2)     9 ( 4.2)             
##   local_walk (mean (SD))            0.71 (1.92)  0.77 (2.09)   0.790

LINEAR MIXED MODEL:

Exploración.

Esto es solo para tener una idea de la relación entre las variables. No utilizar para nada que no sea tener algo de intuición de qué esperar:

dg=df %>% select(Codigo,sexo_s1,imc,peso,starts_with("n"), starts_with("dur"), starts_with("ENMO"), starts_with("local")) %>% 
                   group_by(Codigo,sexo_s1) %>%
                   summarise_all(mean,na.rm=T) %>% ungroup()
## Warning: There were 854 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `Nodo = (new("standardGeneric", .Data = function (x, ...) ...`.
## ℹ In group 1: `Codigo = "P201001"` and `sexo_s1 = Hombre`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 853 remaining warnings.
dg   %>% ggplot( aes(y=n_wb5_01_90_3m_i2m_i25_daily, x = local_walk, color=sexo_s1)) + geom_point(alpha=0.5)+geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 39 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 39 rows containing missing values (`geom_point()`).

dg   %>% ggplot( aes(y=dur_wb5_01_90_3m_i2m_i25_daily, x = local_walk, color=sexo_s1)) + geom_point(alpha=0.5)+geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 39 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 39 rows containing missing values (`geom_point()`).

dg   %>% ggplot( aes(y=ENMO_mean_wb5_01_90_3m_i2m_i25_daily, x = local_walk, color=sexo_s1)) + geom_point(alpha=0.5)+geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 40 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 40 rows containing missing values (`geom_point()`).

## Base longitudinal de solo la base de datos con datos longitudinales recogidos

Esta base longitudinal la creamos y almacenamos para chequear que no hay errores. Se imputan los valores de SF36Dim_SF mediante LOCF por que parece que no se mide siempre.

dfLongNoMissing=
  df %>% select(Codigo,sexo_s1,grupo_int,Tiempo, n_wb5_01_90_3m_i2m_i25_daily,local_walk, imc,peso , SF36Dim_SF, dur_wb5_01_90_3m_i2m_i25_daily,ENMO_mean_wb5_01_90_3m_i2m_i25_daily) %>%
  arrange(Codigo,Tiempo) %>%
  #quedarsos solo con las filas completas
  filter(complete.cases(.[,1:8])) %>%
  mutate(TiempoCuali=as.factor(Tiempo))



dfLongNoMissing=dfLongNoMissing %>% group_by(Codigo) %>%   mutate(SF36Dim_SF=zoo::na.locf0(SF36Dim_SF)) %>% ungroup()
dfLongNoMissing %>%   haven::write_sav("salida/abstract-Longitudinal-NoMissing.sav")

Vamos a describirla con tableone:

tableone::CreateTableOne(vars = c("Tiempo", "TiempoCuali", "n_wb5_01_90_3m_i2m_i25_daily", "dur_wb5_01_90_3m_i2m_i25_daily","imc","SF36Dim_SF", "local_walk"), 
                                             data = dfLongNoMissing)
##                                             
##                                              Overall      
##   n                                           2091        
##   Tiempo (mean (SD))                          3.05 (2.31) 
##   TiempoCuali (%)                                         
##      0                                         291 (13.9) 
##      0.5                                       182 ( 8.7) 
##      1                                         224 (10.7) 
##      2                                         268 (12.8) 
##      3                                         238 (11.4) 
##      4                                         232 (11.1) 
##      5                                         247 (11.8) 
##      6                                         216 (10.3) 
##      7                                         193 ( 9.2) 
##   n_wb5_01_90_3m_i2m_i25_daily (mean (SD))    1.77 (1.25) 
##   dur_wb5_01_90_3m_i2m_i25_daily (mean (SD)) 48.22 (44.26)
##   imc (mean (SD))                            31.84 (3.84) 
##   SF36Dim_SF (mean (SD))                     86.14 (21.99)
##   local_walk (mean (SD))                      0.54 (1.95)

Los resultados usandomodelos lineales mixtos donde además de una fuente de error aleatorio por visita existe otra fuente de error aleatorio debido al propio paciente (1|Codigo):

variable="n_wb5_01_90_3m_i2m_i25_daily"
modelotxt=sprintf("%s ~ local_walk  + SF36Dim_SF + imc + sexo_s1+ Tiempo + grupo_int+(1|Codigo)",variable)  
cat (modelotxt)
## n_wb5_01_90_3m_i2m_i25_daily ~ local_walk  + SF36Dim_SF + imc + sexo_s1+ Tiempo + grupo_int+(1|Codigo)
modelo=lmer(formula(modelotxt),data=dfLongNoMissing)
modelsummary(modelo, stars = TRUE,conf_level = 0.95,statistic = c("p.value","conf.int"))
(1)
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 2.730***
(<0.001)
[2.065, 3.394]
local_walk 0.071**
(0.004)
[0.023, 0.119]
SF36Dim_SF 0.005***
(<0.001)
[0.003, 0.008]
imc -0.038***
(<0.001)
[-0.057, -0.019]
sexo_s1Mujer -0.446***
(<0.001)
[-0.638, -0.253]
Tiempo -0.052***
(<0.001)
[-0.068, -0.036]
grupo_intB 0.151
(0.124)
[-0.041, 0.344]
SD (Intercept Codigo) 0.881
SD (Observations) 0.746
Num.Obs. 2015
R2 Marg. 0.098
R2 Cond. 0.623
AIC 5363.3
BIC 5413.7
ICC 0.6
RMSE 0.68
variable="dur_wb5_01_90_3m_i2m_i25_daily"
modelotxt=sprintf("%s ~ local_walk  + SF36Dim_SF + imc + sexo_s1+ Tiempo + grupo_int+(1|Codigo)",variable)  
cat (modelotxt)
## dur_wb5_01_90_3m_i2m_i25_daily ~ local_walk  + SF36Dim_SF + imc + sexo_s1+ Tiempo + grupo_int+(1|Codigo)
modelo=lmer(formula(modelotxt),data=dfLongNoMissing)
modelsummary(modelo, stars = TRUE,conf_level = 0.95,statistic = c("p.value","conf.int"))
(1)
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 99.726***
(<0.001)
[76.671, 122.781]
local_walk 2.359**
(0.007)
[0.631, 4.088]
SF36Dim_SF 0.114**
(0.004)
[0.037, 0.191]
imc -1.823***
(<0.001)
[-2.470, -1.177]
sexo_s1Mujer -17.239***
(<0.001)
[-24.141, -10.338]
Tiempo -0.351
(0.200)
[-0.889, 0.186]
grupo_intB 6.817+
(0.053)
[-0.079, 13.714]
SD (Intercept Codigo) 31.832
SD (Observations) 25.188
Num.Obs. 2015
R2 Marg. 0.098
R2 Cond. 0.653
AIC 19542.4
BIC 19592.9
ICC 0.6
RMSE 23.01
variable="ENMO_mean_wb5_01_90_3m_i2m_i25_daily"
modelotxt=sprintf("%s ~ local_walk  + SF36Dim_SF + imc + sexo_s1+ Tiempo + grupo_int+(1|Codigo)",variable)  
cat (modelotxt)
## ENMO_mean_wb5_01_90_3m_i2m_i25_daily ~ local_walk  + SF36Dim_SF + imc + sexo_s1+ Tiempo + grupo_int+(1|Codigo)
modelo=lmer(formula(modelotxt),data=dfLongNoMissing)
modelsummary(modelo, stars = TRUE,conf_level = 0.95,statistic = c("p.value","conf.int"))
(1)
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 112.469***
(<0.001)
[85.115, 139.823]
local_walk 1.802*
(0.019)
[0.294, 3.310]
SF36Dim_SF 0.020
(0.737)
[-0.095, 0.135]
imc -0.497
(0.190)
[-1.239, 0.246]
sexo_s1Mujer -10.645***
(<0.001)
[-16.657, -4.633]
Tiempo -0.995*
(0.037)
[-1.931, -0.059]
grupo_intB 1.924
(0.526)
[-4.019, 7.867]
SD (Intercept Codigo) 20.446
SD (Observations) 44.995
Num.Obs. 1935
R2 Marg. 0.021
R2 Cond. 0.189
AIC 20487.3
BIC 20537.4
ICC 0.2
RMSE 42.76

Vamos a ver el efecto de walking sobre el IMC y el peso

modelotxt=sprintf("imc ~ %s + dur_wb5_01_90_3m_i2m_i25_daily+n_wb5_01_90_3m_i2m_i25_daily+ ENMO_mean_wb5_01_90_3m_i2m_i25_daily+ sexo_s1+ Tiempo+grupo_int+(1|Codigo)",variable)  

modelo=lmer(formula(modelotxt),data=dfLongNoMissing)
modelsummary(modelo, stars = TRUE,conf_level = 0.95,statistic = c("p.value","conf.int"))
(1)
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 32.733***
(<0.001)
[32.062, 33.403]
ENMO_mean_wb5_01_90_3m_i2m_i25_daily -0.002*
(0.045)
[-0.003, 0.000]
dur_wb5_01_90_3m_i2m_i25_daily -0.007***
(<0.001)
[-0.010, -0.003]
n_wb5_01_90_3m_i2m_i25_daily 0.042
(0.484)
[-0.075, 0.158]
sexo_s1Mujer 0.010
(0.978)
[-0.703, 0.723]
Tiempo -0.040**
(0.009)
[-0.071, -0.010]
grupo_intB -0.258
(0.477)
[-0.970, 0.454]
SD (Intercept Codigo) 3.490
SD (Observations) 1.376
Num.Obs. 2009
R2 Marg. 0.007
R2 Cond. 0.866
AIC 8344.8
BIC 8395.2
ICC 0.9
RMSE 1.24
modelotxt=sprintf("peso ~ %s + dur_wb5_01_90_3m_i2m_i25_daily+n_wb5_01_90_3m_i2m_i25_daily+ ENMO_mean_wb5_01_90_3m_i2m_i25_daily+ sexo_s1+ Tiempo+grupo_int+(1|Codigo)",variable)  

modelo=lmer(formula(modelotxt),data=dfLongNoMissing)
modelsummary(modelo, stars = TRUE,conf_level = 0.95,statistic = c("p.value","conf.int"))
(1)
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 92.746***
(<0.001)
[90.685, 94.807]
ENMO_mean_wb5_01_90_3m_i2m_i25_daily -0.003+
(0.093)
[-0.007, 0.001]
dur_wb5_01_90_3m_i2m_i25_daily -0.018***
(<0.001)
[-0.026, -0.009]
n_wb5_01_90_3m_i2m_i25_daily 0.138
(0.351)
[-0.152, 0.427]
sexo_s1Mujer -12.689***
(<0.001)
[-14.941, -10.437]
Tiempo -0.237***
(<0.001)
[-0.312, -0.162]
grupo_intB -1.400
(0.223)
[-3.651, 0.851]
SD (Intercept Codigo) 11.119
SD (Observations) 3.398
Num.Obs. 2009
R2 Marg. 0.227
R2 Cond. 0.934
AIC 12153.0
BIC 12203.5
ICC 0.9
RMSE 3.06