Carga de librerías. Deben estar previamente instaladas

library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(table1)
## 
## Adjuntando el paquete: 'table1'
## 
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(psych)
## 
## Adjuntando el paquete: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(dplyr)
library(tidyr)
library(car)
## Cargando paquete requerido: carData
## 
## Adjuntando el paquete: 'car'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(epiR)
## Cargando paquete requerido: survival
## Package epiR 2.0.77 is loaded
## Type help(epi.about) for summary information
## Type browseVignettes(package = 'epiR') to learn how to use epiR for applied epidemiological analyses
library(expss)
## Cargando paquete requerido: maditr
## 
## To get total summary skip 'by' argument: take_all(mtcars, mean)
## 
## 
## Adjuntando el paquete: 'maditr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, coalesce, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## The following object is masked from 'package:readr':
## 
##     cols
## 
## 
## Adjuntando el paquete: 'expss'
## 
## The following object is masked from 'package:car':
## 
##     recode
## 
## The following objects are masked from 'package:stringr':
## 
##     fixed, regex
## 
## The following objects are masked from 'package:dplyr':
## 
##     compute, contains, na_if, recode, vars, where
## 
## The following objects are masked from 'package:purrr':
## 
##     keep, modify, modify_if, when
## 
## The following objects are masked from 'package:tidyr':
## 
##     contains, nest
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars
library(descr)
library(ggplot2)
library(tableone)
options(scipen = 999, digits = 3, encoding = 'UTF-8')   

Carga del dataset. También se puede realizar mediante File, Import dataset, from excel.

Framingham_1_ <- read_excel("C:/Users/Administrador/Downloads/Framingham (1).xlsx")

Calculen la tasa de mortalidad global DEATH de la población total. Previamenteponemos en minúscula para agilizar la escritura. Luego pasamos a factor las variables CHR

mortalidad_global <- (sum(Framingham_1_$DEATH, na.rm = TRUE) / nrow(Framingham_1_)) * 100
mortalidad_global
## [1] 35
Framingham_1_ <- Framingham_1_ %>% mutate(across(c(CURSMOKE, SEX, DIABETES, PREVCHD, PREVSTRK,PREVHYP, DEATH, ANGINA, HOSPMI, ANYCHD, STROKE), as.factor))
names (Framingham_1_)<- tolower (names (Framingham_1_))

Armen una Tabla 1 para las variables SEX, AGE, SYSBP, CURSMOKE, CIGPDAY, BMI, DIABETES, PREVCHD, PREVSTRK, PREVHYP agrupando por DEATH Si – No. Para ello evaluen la distribución de las variables continuas con testeos y gráficos para definir qué medida de resumen le corresponde a cada una.

#variable edad: la distribución se considera normal
hist(Framingham_1_$age)

qqnorm(Framingham_1_$age)
qqline(Framingham_1_$age)

describe(Framingham_1_$age)
##    vars    n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 4434 49.9 8.68     49    49.7 10.4  32  70    38 0.19    -1.03 0.13
shapiro.test(Framingham_1_$age)
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$age
## W = 1, p-value <0.0000000000000002
#variable presion sistolica: la distribución se considera normal
hist(Framingham_1_$sysbp)

qqnorm(Framingham_1_$sysbp)
qqline(Framingham_1_$sysbp)

describe(Framingham_1_$sysbp)
##    vars    n mean   sd median trimmed  mad  min max range skew kurtosis   se
## X1    1 4434  133 22.4    129     131 19.3 83.5 295   212 1.15     2.08 0.34
shapiro.test(Framingham_1_$sysbp)
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$sysbp
## W = 0.9, p-value <0.0000000000000002
#variable numero de cigarrillos dia: la distribución se considera no normal
hist(Framingham_1_$cigpday)

qqnorm(Framingham_1_$cigpday)
qqline(Framingham_1_$cigpday)

describe(Framingham_1_$cigpday)
##    vars    n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 4402 8.97 11.9      0    6.84   0   0  70    70 1.26     1.07 0.18
shapiro.test(Framingham_1_$cigpday)
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$cigpday
## W = 0.8, p-value <0.0000000000000002
#variable indice de masa corporal: la distribución se considera normal
hist(Framingham_1_$bmi)

qqnorm(Framingham_1_$bmi)
qqline(Framingham_1_$bmi)

describe(Framingham_1_$bmi)
##    vars    n mean  sd median trimmed  mad  min  max range skew kurtosis   se
## X1    1 4415 25.9 4.1   25.4    25.6 3.68 15.5 56.8  41.3 0.98      2.6 0.06
shapiro.test(Framingham_1_$bmi)
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$bmi
## W = 1, p-value <0.0000000000000002
#tabla 1
table1(~ sex + age + sysbp + cursmoke + cigpday + bmi+ diabetes+ prevchd + prevstrk + prevhyp|death,data=Framingham_1_)
0
(N=2884)
1
(N=1550)
Overall
(N=4434)
sex
0 1783 (61.8%) 707 (45.6%) 2490 (56.2%)
1 1101 (38.2%) 843 (54.4%) 1944 (43.8%)
age
Mean (SD) 47.2 (7.67) 54.9 (8.22) 49.9 (8.68)
Median [Min, Max] 46.0 [32.0, 69.0] 56.0 [34.0, 70.0] 49.0 [32.0, 70.0]
sysbp
Mean (SD) 128 (18.5) 142 (25.8) 133 (22.4)
Median [Min, Max] 125 [85.0, 243] 138 [83.5, 295] 129 [83.5, 295]
cursmoke
0 1491 (51.7%) 762 (49.2%) 2253 (50.8%)
1 1393 (48.3%) 788 (50.8%) 2181 (49.2%)
cigpday
Mean (SD) 8.44 (11.4) 9.95 (12.8) 8.97 (11.9)
Median [Min, Max] 0 [0, 70.0] 1.00 [0, 60.0] 0 [0, 70.0]
Missing 22 (0.8%) 10 (0.6%) 32 (0.7%)
bmi
Mean (SD) 25.5 (3.91) 26.4 (4.38) 25.8 (4.10)
Median [Min, Max] 25.1 [16.6, 56.8] 26.1 [15.5, 51.3] 25.5 [15.5, 56.8]
Missing 6 (0.2%) 13 (0.8%) 19 (0.4%)
diabetes
0 2857 (99.1%) 1456 (93.9%) 4313 (97.3%)
1 27 (0.9%) 94 (6.1%) 121 (2.7%)
prevchd
0 2834 (98.3%) 1406 (90.7%) 4240 (95.6%)
1 50 (1.7%) 144 (9.3%) 194 (4.4%)
prevstrk
0 2877 (99.8%) 1525 (98.4%) 4402 (99.3%)
1 7 (0.2%) 25 (1.6%) 32 (0.7%)
prevhyp
0 2212 (76.7%) 792 (51.1%) 3004 (67.7%)
1 672 (23.3%) 758 (48.9%) 1430 (32.3%)

Analicen la relación entre presión arterial sistólica, como variable continua, y DEATH. Interpreten sus resultados.

Pista: Piensen que tipo de variables tienen para elegir el test correspondiente

#analisis de la variable presion sistolica en los grupos de muerte (si/no) para evaluar utilizacion de t test o wilcoxon
hist(Framingham_1_$sysbp[Framingham_1_$death==1])

hist(Framingham_1_$sysbp[Framingham_1_$death==0])

qqnorm(Framingham_1_$sysbp[Framingham_1_$death==1])

qqnorm(Framingham_1_$sysbp[Framingham_1_$death==0])
qqline(Framingham_1_$sysbp[Framingham_1_$death==1])
qqline(Framingham_1_$sysbp[Framingham_1_$death==0])

describeBy(Framingham_1_$sysbp, Framingham_1_$death)
## 
##  Descriptive statistics by group 
## group: 0
##    vars    n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 2884  128 18.5    125     126 16.3  85 243   158 1.02     1.75 0.35
## ------------------------------------------------------------ 
## group: 1
##    vars    n mean   sd median trimmed  mad  min max range skew kurtosis   se
## X1    1 1550  142 25.8    138     140 23.7 83.5 295   212 0.89     1.17 0.66
shapiro.test(Framingham_1_$sysbp[Framingham_1_$death==0])
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$sysbp[Framingham_1_$death == 0]
## W = 0.9, p-value <0.0000000000000002
shapiro.test(Framingham_1_$sysbp[Framingham_1_$death==1])
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$sysbp[Framingham_1_$death == 1]
## W = 1, p-value <0.0000000000000002
#en base a la distribución normal de la variable en ambos estratos, se decide utilizar t test
t.test(Framingham_1_$sysbp~Framingham_1_$death)
## 
##  Welch Two Sample t-test
## 
## data:  Framingham_1_$sysbp by Framingham_1_$death
## t = -19, df = 2426, p-value <0.0000000000000002
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -15.7 -12.8
## sample estimates:
## mean in group 0 mean in group 1 
##             128             142
#Se halla una diferencia estadísticamente significativa entre la presión sistólica de pacientes que fallecieron y de los que no fallecieron

Dicotomicen la variable presión arterial sistólica (variable SYSBP) en menores de 140 (normotensos) y mayores o iguales 140 mm hg (hipertensos), llámenla CATHYP. Vuelvan a analizar la relación entre hipertensión arterial ahora como variable dicotómica y el evento DEATH. Reporten e interpreten la magnitud de la asociación (con intervalos de confianza) en caso de que exista una asociación entre estas dos variables.

Pista: Piensen si DEATH es incidencia o prevalencia…

#DEATH es incidencia dado que es una cohorte
#creamos la variable cathyp
Framingham_1_ <- Framingham_1_ %>% mutate(cathyp = factor(case_when(
  sysbp < 140 ~ "normotensos",
  sysbp >= 140 ~ "hipertensos",
)))
#cambiamos el orden de los niveles para mejorar la vision en la tabla y la interpretacion del OR
levels(Framingham_1_$cathyp)
## [1] "hipertensos" "normotensos"
levels(Framingham_1_$death)
## [1] "0" "1"
Framingham_1_ <- Framingham_1_ %>% mutate(death = factor(death, levels = c("1", "0")))
Framingham_1_ <- Framingham_1_ %>% mutate(cathyp = factor(cathyp, levels = c("hipertensos", "normotensos")))

levels(Framingham_1_$cathyp)
## [1] "hipertensos" "normotensos"
levels(Framingham_1_$death)
## [1] "1" "0"
tabla_cruzada <- table(Framingham_1_$cathyp,Framingham_1_$death)
tabla_cruzada
##              
##                  1    0
##   hipertensos  731  650
##   normotensos  819 2234
#aplicamos chi2 y fisher para relacionar las variables. Las mismas se encuentran relacionadas. La relación se hizo mediante CHI2
tabla2x2 <- crosstab(Framingham_1_$cathyp,Framingham_1_$death, prop.r = TRUE, prop.c = TRUE, chisq = TRUE, fisher = TRUE, expected = TRUE)

tabla2x2
##    Cell Contents 
## |-------------------------|
## |                   Count | 
## |         Expected Values | 
## |             Row Percent | 
## |          Column Percent | 
## |-------------------------|
## 
## =============================================
##                         Framingham_1_$death
## Framingham_1_$cathyp        1       0   Total
## ---------------------------------------------
## hipertensos              731     650    1381 
##                          483     898         
##                         52.9%   47.1%   31.1%
##                         47.2%   22.5%        
## ---------------------------------------------
## normotensos              819    2234    3053 
##                         1067    1986         
##                         26.8%   73.2%   68.9%
##                         52.8%   77.5%        
## ---------------------------------------------
## Total                   1550    2884    4434 
##                           35%     65%        
## =============================================
## 
## Statistics for All Table Factors
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 = 285      d.f. = 1      p <0.0000000000000002 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 = 284      d.f. = 1      p <0.0000000000000002 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Sample estimate odds ratio: 3.07 
## 
## Alternative hypothesis: true odds ratio is not equal to 1 
## p <0.0000000000000002 
## 95% confidence interval: 2.68 3.51 
## 
## Alternative hypothesis: true odds ratio is less than 1 
## p = 1 
## 95%s confidence interval: % 0 3.44 
## 
## Alternative hypothesis: true odds ratio is greater than 1 
## p <0.0000000000000002 
## 95%s confidence interval: % 2.74 Inf 
## 
##         Minimum expected frequency: 483
epi.2by2(table(Framingham_1_$cathyp,Framingham_1_$death)[c("hipertensos","normotensos"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          731          650       1381     52.93 (50.26 to 55.59)
## Exposed -          819         2234       3053     26.83 (25.26 to 28.44)
## Total             1550         2884       4434     34.96 (33.55 to 36.38)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 1.97 (1.83, 2.13)
## Inc odds ratio                                 3.07 (2.69, 3.50)
## Attrib risk in the exposed *                   26.11 (23.04, 29.17)
## Attrib fraction in the exposed (%)            49.32 (45.27, 53.07)
## Attrib risk in the population *                8.13 (6.02, 10.24)
## Attrib fraction in the population (%)         23.26 (20.32, 26.10)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 285.029 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
#el riesgo de morir siendo hipertenso es de 1.97 veces (1.83, 2.13) mayor que siendo normotenso con una certeza del 95%

Evalúen si la variable que consigna CURSMOKE se comporta como confundidor y/o modificador de efecto en la relación entre hipertensión arterial como variable dicotómica CATHYP (variable que dicotomizaron en el punto 3, ojo, no es PREVHYP) y DEATH. Justifiquen su respuesta.

Pista: Si estamos agregando una segunda variable para explicar la relación entre un factor de riesgo y un evento ¿qué test podemos usar?

#Para evaluar si es confundidor, se debe explorar la relación cursmoke vs cathyp y cusrmoke vs death con el objetivo de ver si se relaciona con exposicion y evento
#Relación cathyp vs death
epi.2by2(table(Framingham_1_$cathyp,Framingham_1_$death,Framingham_1_$cursmoke)[c("hipertensos","normotensos"),c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          731          650       1381     52.93 (50.26 to 55.59)
## Exposed -          819         2234       3053     26.83 (25.26 to 28.44)
## Total             1550         2884       4434     34.96 (33.55 to 36.38)
## 
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio (crude)                         1.97 (1.83, 2.13)
## Inc risk ratio (M-H)                           2.02 (1.86, 2.18)
## Inc risk ratio (crude:M-H)                     0.98
## Inc odds ratio (crude)                         3.07 (2.69, 3.50)
## Inc odds ratio (M-H)                           3.18 (2.78, 3.64)
## Inc odds ratio (crude:M-H)                     0.96
## Attrib risk in the exposed (crude) *           26.11 (23.04, 29.17)
## Attrib risk in the exposed (M-H) *             26.77 (21.67, 31.88)
## Attrib risk (crude:M-H)                        0.98
## -------------------------------------------------------------------
##  M-H test of homogeneity of IRRs: chi2(1) = 1.834 Pr>chi2 = 0.176
##  M-H test of homogeneity of ORs: chi2(1) = 0.280 Pr>chi2 = 0.597
##  Test that M-H adjusted OR = 1:  chi2(1) = 296.106 Pr>chi2 = <0.001
##  Wald confidence limits
##  M-H: Mantel-Haenszel; CI: confidence interval
##  * Outcomes per 100 population units
#Relación cursmoke vs cathyp
epi.2by2(table(Framingham_1_$cursmoke,Framingham_1_$cathyp)[c("1","0"),c("hipertensos","normotensos")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          558         1623       2181     25.58 (23.76 to 27.47)
## Exposed -          823         1430       2253     36.53 (34.54 to 38.56)
## Total             1381         3053       4434     31.15 (29.78 to 32.53)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 0.70 (0.64, 0.77)
## Inc odds ratio                                 0.60 (0.53, 0.68)
## Attrib risk in the exposed *                   -10.94 (-13.65, -8.24)
## Attrib fraction in the exposed (%)            -42.78 (-56.21, -30.50)
## Attrib risk in the population *                -5.38 (-7.79, -2.97)
## Attrib fraction in the population (%)         -17.28 (-21.67, -13.06)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 61.899 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
#Relación cursmoke vs death
epi.2by2(table(Framingham_1_$cursmoke,Framingham_1_$death)[c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          788         1393       2181     36.13 (34.11 to 38.19)
## Exposed -          762         1491       2253     33.82 (31.87 to 35.82)
## Total             1550         2884       4434     34.96 (33.55 to 36.38)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 1.07 (0.99, 1.16)
## Inc odds ratio                                 1.11 (0.98, 1.25)
## Attrib risk in the exposed *                   2.31 (-0.50, 5.12)
## Attrib fraction in the exposed (%)            6.39 (-1.44, 13.61)
## Attrib risk in the population *                1.14 (-1.27, 3.54)
## Attrib fraction in the population (%)         3.25 (-0.78, 7.12)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 2.598 Pr>chi2 = 0.107
## Fisher exact test that OR = 1: Pr>chi2 = 0.108
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
#Finalmente se realiza el test de Mantel Haenszel para evaluar si ser fumador (cursmoke) es confundidor
epi.2by2(table(Framingham_1_$cathyp,Framingham_1_$death,Framingham_1_$cursmoke)[c("hipertensos","normotensos"),c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          731          650       1381     52.93 (50.26 to 55.59)
## Exposed -          819         2234       3053     26.83 (25.26 to 28.44)
## Total             1550         2884       4434     34.96 (33.55 to 36.38)
## 
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio (crude)                         1.97 (1.83, 2.13)
## Inc risk ratio (M-H)                           2.02 (1.86, 2.18)
## Inc risk ratio (crude:M-H)                     0.98
## Inc odds ratio (crude)                         3.07 (2.69, 3.50)
## Inc odds ratio (M-H)                           3.18 (2.78, 3.64)
## Inc odds ratio (crude:M-H)                     0.96
## Attrib risk in the exposed (crude) *           26.11 (23.04, 29.17)
## Attrib risk in the exposed (M-H) *             26.77 (21.67, 31.88)
## Attrib risk (crude:M-H)                        0.98
## -------------------------------------------------------------------
##  M-H test of homogeneity of IRRs: chi2(1) = 1.834 Pr>chi2 = 0.176
##  M-H test of homogeneity of ORs: chi2(1) = 0.280 Pr>chi2 = 0.597
##  Test that M-H adjusted OR = 1:  chi2(1) = 296.106 Pr>chi2 = <0.001
##  Wald confidence limits
##  M-H: Mantel-Haenszel; CI: confidence interval
##  * Outcomes per 100 population units
#Para evaluar si es modificador de efecto, se debe analizar la relacion Hipertension arterial(cathyp) y muerte (death) en los estratos de Fumador (cursmoke)
#Relación entre hipertensión y muerte Estrato fumador (cursmoke=1): 
epi.2by2(table(Framingham_1_$cathyp[Framingham_1_$cursmoke==1],Framingham_1_$death[Framingham_1_$cursmoke==1])[c("hipertensos","normotensos"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns") 
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          312          246        558     55.91 (51.68 to 60.08)
## Exposed -          476         1147       1623     29.33 (27.12 to 31.61)
## Total              788         1393       2181     36.13 (34.11 to 38.19)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 1.91 (1.72, 2.12)
## Inc odds ratio                                 3.06 (2.51, 3.73)
## Attrib risk in the exposed *                   26.59 (21.91, 31.26)
## Attrib fraction in the exposed (%)            47.55 (41.71, 52.80)
## Attrib risk in the population *                6.80 (3.81, 9.80)
## Attrib fraction in the population (%)         18.83 (15.18, 22.31)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 127.181 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
#Relación entre hipertensión y muerte Estrato No  fumador (cursmoke=0): 
epi.2by2(table(Framingham_1_$cathyp[Framingham_1_$cursmoke==0],Framingham_1_$death[Framingham_1_$cursmoke==0])[c("hipertensos","normotensos"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns") 
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          419          404        823     50.91 (47.44 to 54.38)
## Exposed -          343         1087       1430     23.99 (21.79 to 26.29)
## Total              762         1491       2253     33.82 (31.87 to 35.82)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 2.12 (1.89, 2.38)
## Inc odds ratio                                 3.29 (2.74, 3.95)
## Attrib risk in the exposed *                   26.93 (22.86, 31.00)
## Attrib fraction in the exposed (%)            52.89 (47.19, 57.97)
## Attrib risk in the population *                9.84 (6.88, 12.79)
## Attrib fraction in the population (%)         29.08 (24.34, 33.52)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 169.194 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units

Analicen la relación entre presencia de DIABETES y DEATH. Interpreten sus resultados. Reporten e interpreten la magnitud de la asociación (con intervalos de confianza) en caso de que exista una asociación entre estas dos variables.

levels(Framingham_1_$diabetes)
## [1] "0" "1"
Framingham_1_ <- Framingham_1_ %>% mutate(diabetes = factor(diabetes, levels = c("1", "0")))
levels(Framingham_1_$diabetes)
## [1] "1" "0"
tabla2x2chi2fisher <- crosstab(Framingham_1_$diabetes,Framingham_1_$death, prop.r = TRUE, prop.c = TRUE, chisq = TRUE, fisher = TRUE, expected =TRUE)

tabla2x2chi2fisher
##    Cell Contents 
## |-------------------------|
## |                   Count | 
## |         Expected Values | 
## |             Row Percent | 
## |          Column Percent | 
## |-------------------------|
## 
## =================================================
##                           Framingham_1_$death
## Framingham_1_$diabetes         1        0   Total
## -------------------------------------------------
## 1                            94       27     121 
##                            42.3     78.7         
##                            77.7%    22.3%    2.7%
##                             6.1%     0.9%        
## -------------------------------------------------
## 0                          1456     2857    4313 
##                          1507.7   2805.3         
##                            33.8%    66.2%   97.3%
##                            93.9%    99.1%        
## -------------------------------------------------
## Total                      1550     2884    4434 
##                              35%      65%        
## =================================================
## 
## Statistics for All Table Factors
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 = 99.9      d.f. = 1      p <0.0000000000000002 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 = 98      d.f. = 1      p <0.0000000000000002 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Sample estimate odds ratio: 6.83 
## 
## Alternative hypothesis: true odds ratio is not equal to 1 
## p <0.0000000000000002 
## 95% confidence interval: 4.39 11 
## 
## Alternative hypothesis: true odds ratio is less than 1 
## p = 1 
## 95%s confidence interval: % 0 10.2 
## 
## Alternative hypothesis: true odds ratio is greater than 1 
## p <0.0000000000000002 
## 95%s confidence interval: % 4.68 Inf 
## 
##         Minimum expected frequency: 42.3

¿Consideran que el sexo (SEX) se comporta como un confundidor y/o modificador de efecto en la relación entre DIABETES y DEATH? Justifiquen su respuesta.

#Para evaluar si SEX es confundidor o modificador de efecto se debe evaluar si se encuentra relacionada a la exposición y al efecto, es decir con diabetes y con muerte (death)y luego aplicar Mantel Haenszel
#Relación entre SEX y DIABETES (Sexo y Diabetes)
epi.2by2(table(Framingham_1_$sex,Framingham_1_$diabetes)[c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +           59         1885       1944        3.03 (2.32 to 3.90)
## Exposed -           62         2428       2490        2.49 (1.91 to 3.18)
## Total              121         4313       4434        2.73 (2.27 to 3.25)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 1.22 (0.86, 1.73)
## Inc odds ratio                                 1.23 (0.85, 1.76)
## Attrib risk in the exposed *                   0.55 (-0.43, 1.52)
## Attrib fraction in the exposed (%)            17.96 (-16.60, 42.27)
## Attrib risk in the population *                0.24 (-0.54, 1.02)
## Attrib fraction in the population (%)         8.76 (-8.30, 23.13)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 1.222 Pr>chi2 = 0.269
## Fisher exact test that OR = 1: Pr>chi2 = 0.307
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
#Relación entre SEX y DEATH (Sexo y Muerte)
epi.2by2(table(Framingham_1_$sex,Framingham_1_$death)[c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +          843         1101       1944     43.36 (41.15 to 45.60)
## Exposed -          707         1783       2490     28.39 (26.63 to 30.21)
## Total             1550         2884       4434     34.96 (33.55 to 36.38)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 1.53 (1.41, 1.66)
## Inc odds ratio                                 1.93 (1.70, 2.19)
## Attrib risk in the exposed *                   14.97 (12.14, 17.80)
## Attrib fraction in the exposed (%)            34.52 (29.04, 39.58)
## Attrib risk in the population *                6.56 (4.30, 8.82)
## Attrib fraction in the population (%)         18.78 (15.11, 22.29)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 107.608 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
#Mantel Haenszel para evaluar si el sexo (sex) es confundidor en la relación Diabetes y muerte
epi.2by2(table(Framingham_1_$diabetes,Framingham_1_$death,Framingham_1_$sex)[c("1","0"),c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns")
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +           94           27        121     77.69 (69.22 to 84.75)
## Exposed -         1456         2857       4313     33.76 (32.35 to 35.19)
## Total             1550         2884       4434     34.96 (33.55 to 36.38)
## 
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio (crude)                         2.30 (2.07, 2.55)
## Inc risk ratio (M-H)                           2.25 (2.03, 2.49)
## Inc risk ratio (crude:M-H)                     1.02
## Inc odds ratio (crude)                         6.83 (4.43, 10.53)
## Inc odds ratio (M-H)                           6.81 (4.39, 10.56)
## Inc odds ratio (crude:M-H)                     1.00
## Attrib risk in the exposed (crude) *           43.93 (36.38, 51.48)
## Attrib risk in the exposed (M-H) *             43.18 (-33.42, 119.79)
## Attrib risk (crude:M-H)                        1.02
## -------------------------------------------------------------------
##  M-H test of homogeneity of IRRs: chi2(1) = 10.547 Pr>chi2 = 0.001
##  M-H test of homogeneity of ORs: chi2(1) = 0.360 Pr>chi2 = 0.548
##  Test that M-H adjusted OR = 1:  chi2(1) = 97.964 Pr>chi2 = <0.001
##  Wald confidence limits
##  M-H: Mantel-Haenszel; CI: confidence interval
##  * Outcomes per 100 population units
#para evaluar si existe modificacion de efecto, se debe evaluar la relacion diabetes y muerte (death) en los diferentes estratos de sexo (sex)
#Relación entre diabetes y muerte en estrato hombres
epi.2by2(table(Framingham_1_$diabetes[Framingham_1_$sex==1],Framingham_1_$death[Framingham_1_$sex==1])[c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns") 
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +           48           11         59     81.36 (69.09 to 90.31)
## Exposed -          795         1090       1885     42.18 (39.93 to 44.44)
## Total              843         1101       1944     43.36 (41.15 to 45.60)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 1.93 (1.69, 2.20)
## Inc odds ratio                                 5.98 (3.09, 11.59)
## Attrib risk in the exposed *                   39.18 (29.00, 49.37)
## Attrib fraction in the exposed (%)            48.16 (40.78, 54.62)
## Attrib risk in the population *                1.19 (-1.95, 4.32)
## Attrib fraction in the population (%)         2.74 (1.74, 3.73)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 35.760 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
#Relación entre diabetes y muerte en estrato mujeres
epi.2by2(table(Framingham_1_$diabetes[Framingham_1_$sex==0],Framingham_1_$death[Framingham_1_$sex==0])[c("1","0"),c("1","0")], method = "cohort.count", conf.level = 0.95, outcome = "as.columns") 
##              Outcome +    Outcome -      Total                 Inc risk *
## Exposed +           46           16         62     74.19 (61.50 to 84.47)
## Exposed -          661         1767       2428     27.22 (25.46 to 29.04)
## Total              707         1783       2490     28.39 (26.63 to 30.21)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 2.73 (2.32, 3.20)
## Inc odds ratio                                 7.69 (4.32, 13.67)
## Attrib risk in the exposed *                   46.97 (35.93, 58.00)
## Attrib fraction in the exposed (%)            63.31 (56.92, 68.75)
## Attrib risk in the population *                1.17 (-1.33, 3.67)
## Attrib fraction in the population (%)         4.12 (2.71, 5.50)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 65.600 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units

Generen la variable OBESE como categorías de BMI: normopeso BMI<= 25; sobrepeso BMI >25 y <=30; y obesidad BMI > 30. Pista: Usen case_when para crear los niveles

Framingham_1_ <- Framingham_1_ %>% mutate(obese=as.factor(case_when(bmi>30 ~"obesidad",
                                                                     bmi>25 & bmi<=30 ~ "sobrepeso",
                                                                     bmi<=25 ~ "normopeso")))

Comparen la SYSBP (variable continua) de los pacientes en las diferentes categorías de OBESE (usted en el punto 8 genero las variables de obesidad). Por favor explique que test/tests eligió y fundamente el por qué. Asimismo, exprese cual fue la media de tensión arterial sistólica (variable continua) en cada grupo de OBESE, y entre qué grupos ha habido diferencia significativa en relación con dichas medias.

#Evaluación de la normalidad la variable sysbp por estratos
hist(Framingham_1_$sysbp[Framingham_1_$obese=="normopeso"])

hist(Framingham_1_$sysbp[Framingham_1_$obese=="sobrepeso"])

hist(Framingham_1_$sysbp[Framingham_1_$obese=="obesidad"])

qqnorm(Framingham_1_$sysbp[Framingham_1_$obese=="normopeso"])
qqline(Framingham_1_$sysbp[Framingham_1_$obese=="normopeso"])

qqnorm(Framingham_1_$sysbp[Framingham_1_$obese=="sobrepeso"])
qqline(Framingham_1_$sysbp[Framingham_1_$obese=="sobrepeso"])

qqnorm(Framingham_1_$sysbp[Framingham_1_$obese=="obesidad"])
qqline(Framingham_1_$sysbp[Framingham_1_$obese=="obesidad"])

describeBy(Framingham_1_$sysbp,Framingham_1_$obese)
## 
##  Descriptive statistics by group 
## group: normopeso
##    vars    n mean   sd median trimmed  mad  min max range skew kurtosis   se
## X1    1 1993  127 20.4    123     124 16.3 83.5 244   160  1.4     3.15 0.46
## ------------------------------------------------------------ 
## group: obesidad
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 574  145 24.6    141     143 21.1  93 295   202 1.09     2.67 1.03
## ------------------------------------------------------------ 
## group: sobrepeso
##    vars    n mean   sd median trimmed  mad  min max range skew kurtosis  se
## X1    1 1848  136 21.5    132     134 19.3 83.5 243   160 0.98     1.11 0.5
shapiro.test(Framingham_1_$sysbp[Framingham_1_$obese=="normopeso"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$sysbp[Framingham_1_$obese == "normopeso"]
## W = 0.9, p-value <0.0000000000000002
shapiro.test(Framingham_1_$sysbp[Framingham_1_$obese=="sobrepeso"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$sysbp[Framingham_1_$obese == "sobrepeso"]
## W = 0.9, p-value <0.0000000000000002
shapiro.test(Framingham_1_$sysbp[Framingham_1_$obese=="obesidad"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Framingham_1_$sysbp[Framingham_1_$obese == "obesidad"]
## W = 0.9, p-value = 0.0000000000001
#Se realiza Bartlett test para igualdad de varianzas
bartlett.test(Framingham_1_$sysbp,Framingham_1_$obese) 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Framingham_1_$sysbp and Framingham_1_$obese
## Bartlett's K-squared = 34, df = 2, p-value = 0.00000004
#Si bien la distribución de las variables impresiona normal, no se cumple el principio de homocedasticidad que es una condición necesaria para utilizar ANOVA. Se debe utilizar, entonces, kruskal wallis para la comparación entre más de 2 grupos. 
kruskal.test(Framingham_1_$sysbp~Framingham_1_$obese)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Framingham_1_$sysbp by Framingham_1_$obese
## Kruskal-Wallis chi-squared = 418, df = 2, p-value <0.0000000000000002
# CORRECCIÓN POR BONFERRONI para ver las diferencias entre los grupos.
pairwise.wilcox.test(Framingham_1_$sysbp,Framingham_1_$obese,"bonferroni")
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  Framingham_1_$sysbp and Framingham_1_$obese 
## 
##           normopeso           obesidad           
## obesidad  <0.0000000000000002 -                  
## sobrepeso <0.0000000000000002 <0.0000000000000002
## 
## P value adjustment method: bonferroni

Elaboren sus Tablas de Resultados con los testeos que realizaron, incluyan cuando corresponda, medida de asociación, intervalo de confianza y/o valor de p. Si encontraron algún modificador de efecto pueden reportar por separado los estratos.

catvars = c( "sex","obese","cathyp","cursmoke", "death","diabetes","prevstrk", "prevhyp","prevchd")
vars = c("sex","obese","cathyp","cursmoke", "death","diabetes","sysbp","cigpday","bmi","age","prevstrk", "prevhyp","prevchd")                          
tabla_1 <- CreateTableOne(vars = vars, strata = "death", factorVars = catvars, data = Framingham_1_)
print(tabla_1, nonnormal = c("cursmoke"))# wilcoxon para continuas no normales
##                           Stratified by death
##                            1              0               p      test
##   n                          1550           2884                     
##   sex = 1 (%)                 843 (54.4)    1101 ( 38.2)  <0.001     
##   obese (%)                                               <0.001     
##      normopeso                589 (38.3)    1404 ( 48.8)             
##      obesidad                 257 (16.7)     317 ( 11.0)             
##      sobrepeso                691 (45.0)    1157 ( 40.2)             
##   cathyp = normotensos (%)    819 (52.8)    2234 ( 77.5)  <0.001     
##   cursmoke = 1 (%)            788 (50.8)    1393 ( 48.3)   0.114     
##   death = 0 (%)                 0 ( 0.0)    2884 (100.0)  <0.001     
##   diabetes = 0 (%)           1456 (93.9)    2857 ( 99.1)  <0.001     
##   sysbp (mean (SD))        142.18 (25.82) 127.92 (18.54)  <0.001     
##   cigpday (mean (SD))        9.95 (12.77)   8.44 (11.42)  <0.001     
##   bmi (mean (SD))           26.41 (4.38)   25.54 (3.91)   <0.001     
##   age (mean (SD))           54.92 (8.22)   47.24 (7.67)   <0.001     
##   prevstrk = 1 (%)             25 ( 1.6)       7 (  0.2)  <0.001     
##   prevhyp = 1 (%)             758 (48.9)     672 ( 23.3)  <0.001     
##   prevchd = 1 (%)             144 ( 9.3)      50 (  1.7)  <0.001
kableone(tabla_1)
1 0 p test
n 1550 2884
sex = 1 (%) 843 (54.4) 1101 ( 38.2) <0.001
obese (%) <0.001
normopeso 589 (38.3) 1404 ( 48.8)
obesidad 257 (16.7) 317 ( 11.0)
sobrepeso 691 (45.0) 1157 ( 40.2)
cathyp = normotensos (%) 819 (52.8) 2234 ( 77.5) <0.001
cursmoke = 1 (%) 788 (50.8) 1393 ( 48.3) 0.114
death = 0 (%) 0 ( 0.0) 2884 (100.0) <0.001
diabetes = 0 (%) 1456 (93.9) 2857 ( 99.1) <0.001
sysbp (mean (SD)) 142.18 (25.82) 127.92 (18.54) <0.001
cigpday (mean (SD)) 9.95 (12.77) 8.44 (11.42) <0.001
bmi (mean (SD)) 26.41 (4.38) 25.54 (3.91) <0.001
age (mean (SD)) 54.92 (8.22) 47.24 (7.67) <0.001
prevstrk = 1 (%) 25 ( 1.6) 7 ( 0.2) <0.001
prevhyp = 1 (%) 758 (48.9) 672 ( 23.3) <0.001
prevchd = 1 (%) 144 ( 9.3) 50 ( 1.7) <0.001

Grafiquen por separado la distribución de las variables SEX, SYSBP, CURSMOKE, BMI, DIABETES y las nuevas variables CATHYP y OBESE, en función de la variable DEATH. Relacionen lo que observan en los gráficos con sus resultados previos.

#grafico de presion sistolica en funcion de muerte
ggplot(Framingham_1_, aes(x = death, y = sysbp, fill = death)) + geom_boxplot(alpha = 0.5) + labs(title = "Presion sistolica en fallecidos y no fallecidos", x = "Fallecidos=1 No fallecidos=0", y = "Presion sistolica") + theme_minimal()

#grafico de indice de masa corporal segun muerte
ggplot(Framingham_1_, aes(x = death, y = bmi, fill = death)) + geom_boxplot(alpha = 0.5) + labs(title = "Indice de masa corporal en fallecidos y no fallecidos", x = "Fallecidos=1 No fallecidos=0", y = "Indice de masa corporal") + theme_minimal()
## Warning: Removed 19 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#grafico de barras de numero de personas fallecidas segun sexo
ggplot(Framingham_1_, aes(x = factor(death), fill = factor(sex))) +
  geom_bar(position = "dodge") +
  scale_x_discrete(labels = c("Fallecido", "No Fallecido")) +
  labs(
    title = "Distribución por Sexo y Mortalidad",
    x = "Mortalidad",
    y = "Número de Personas",
    fill = "Sexo\n(0 = Femenino, 1 = Maculino)"
  ) +
  theme_minimal()

#grafico de barras fumador en funcion de muerte
ggplot(Framingham_1_, aes(x = factor(death), fill = factor(cursmoke))) +
  geom_bar(position = "dodge") +
  scale_x_discrete(labels = c("Fallecido", "No Fallecido")) +
  labs(
    title = "Distribución por Fumador segun Mortalidad",
    x = "Mortalidad",
    y = "Número de Personas",
    fill = "Sexo\n(0 = No fumador, 1 = Fumador)"
  ) +
  theme_minimal()

#grafico de barras diabetes en funcion de muerte
Framingham_1_diabetes_si <- Framingham_1_ %>% filter(diabetes == "1")
ggplot(data = Framingham_1_diabetes_si) +
  geom_bar(
    mapping = aes(x = factor(death), fill = factor(diabetes)), 
    position = "dodge"
  ) + 
    scale_fill_manual(values = c("skyblue", "orange")) +
    labs(
    title = "Diabetes según Mortalidad", 
    caption = "Muertes",
    x = "Fallecidos = 1, No fallecidos = 0",
    y = "Número"  
  ) +
   theme_minimal() 

#grafico de barras Hipertension en funcion de muerte
ggplot(Framingham_1_, aes(x = factor(death), fill = factor(cathyp))) +
  geom_bar(position = "dodge") + scale_fill_manual(values = c("orange", "green")) +
  scale_x_discrete(labels = c("Fallecido", "No Fallecido")) +
  labs(
    title = "Distribución de hipertensos según Mortalidad",
    x = "Mortalidad",
    y = "Número de Personas",
    fill = "Sexo\n(0 = No hipertenso, 1 = Hipertenso)"
  ) +
  theme_minimal()

#grafico de barras Obesidad en funcion de muerte
Framingham_1_clean <- Framingham_1_ %>%
  filter(!is.na(obese))
ggplot(Framingham_1_clean, aes(x = factor(death), fill = factor(obese))) +
  geom_bar(position = "dodge") + scale_fill_manual(values = c("skyblue","pink", "grey")) +
  scale_x_discrete(labels = c("Fallecido", "No Fallecido")) +
  labs(
    title = "Distribución de obesos según Mortalidad",
    x = "Mortalidad",
    y = "Número de Personas",
    fill = "Sexo\n(Normopeso, Sobrepeso, Obeso)"
  ) +
  theme_minimal()

Escriban una breve conclusión sobre factores de riesgo de muerte asociada a Hipertensión Arterial, Diabetes y Obesidad.

#La hipertensión arterial es un factor de riesgo para la muerte dado que, tanto la presión sistólica como el antecedente de hipertensión arterial previa se asocian a una mayor mortalidad. En el caso de hipertensión sistólica, la media presión en pacientes fallecidos fue de 142 y en no fallecidos fue de 128, siendo esta diferencia estadísticamente significativa. La diabetes también se encontró asociada a la muerte,en forma estadísticamente significativa así como la presencia de obesidad