library(table1); library(ggplot2); library(ggthemes)
## Warning: package 'table1' was built under R version 4.0.3
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
## Warning: package 'ggplot2' was built under R version 4.0.5
fmh = read.csv("D:/Downloads/tailieu/R course/Seminar TDT 2022/Tai lieu/Data set/Framingham dataset.csv")
fmh$gender = ifelse(fmh$sex == 1, "Male", "Female")
dat = fmh[, c('stroke', 'age', 'gender', 'sysbp', 'tot.chol', 'bmi', 'smoker', 'diabetes')]
table1(~ . | stroke, data=dat)
## Warning in table1.formula(~. | stroke, data = dat): Terms to the right of '|' in
## formula 'x' define table columns and are expected to be factors with meaningful
## labels.
| 0 (N=10566) |
1 (N=1061) |
Overall (N=11627) |
|
|---|---|---|---|
| stroke | |||
| Mean (SD) | 0 (0) | 1.00 (0) | 0.0913 (0.288) |
| Median [Min, Max] | 0 [0, 0] | 1.00 [1.00, 1.00] | 0 [0, 1.00] |
| age | |||
| Mean (SD) | 54.2 (9.45) | 60.4 (8.90) | 54.8 (9.56) |
| Median [Min, Max] | 54.0 [32.0, 81.0] | 61.0 [34.0, 81.0] | 54.0 [32.0, 81.0] |
| gender | |||
| Female | 6032 (57.1%) | 573 (54.0%) | 6605 (56.8%) |
| Male | 4534 (42.9%) | 488 (46.0%) | 5022 (43.2%) |
| sysbp | |||
| Mean (SD) | 135 (21.8) | 151 (26.9) | 136 (22.8) |
| Median [Min, Max] | 131 [83.5, 244] | 147 [94.0, 295] | 132 [83.5, 295] |
| tot.chol | |||
| Mean (SD) | 241 (44.9) | 243 (49.8) | 241 (45.4) |
| Median [Min, Max] | 238 [113, 638] | 240 [107, 696] | 238 [107, 696] |
| Missing | 376 (3.6%) | 33 (3.1%) | 409 (3.5%) |
| bmi | |||
| Mean (SD) | 25.8 (4.03) | 26.8 (4.68) | 25.9 (4.10) |
| Median [Min, Max] | 25.4 [14.4, 56.8] | 26.4 [16.9, 55.3] | 25.5 [14.4, 56.8] |
| Missing | 42 (0.4%) | 10 (0.9%) | 52 (0.4%) |
| smoker | |||
| Mean (SD) | 0.434 (0.496) | 0.418 (0.494) | 0.433 (0.495) |
| Median [Min, Max] | 0 [0, 1.00] | 0 [0, 1.00] | 0 [0, 1.00] |
| diabetes | |||
| Mean (SD) | 0.0380 (0.191) | 0.121 (0.326) | 0.0456 (0.209) |
| Median [Min, Max] | 0 [0, 1.00] | 0 [0, 1.00] | 0 [0, 1.00] |
attach(dat)
vars = data.frame(age, gender, sysbp, tot.chol, bmi, smoker, diabetes)
factor = c('Age', 'Male', 'Systolic', 'Total Cholesterol', 'BMI', 'Smoker', 'Diabetes')
odds.ratio = numeric(length(vars))
lower.or = numeric(length(vars))
upper.or = numeric(length(vars))
for (i in 1:length(vars)) {
m = glm(stroke ~ vars[,i], family=binomial)
odds.ratio[i] = exp(summary(m)[["coefficients"]][2])
lower.or[i] = exp(summary(m)[["coefficients"]][2] - 1.96*summary(m)[["coefficients"]][4])
upper.or[i] = exp(summary(m)[["coefficients"]][2] + 1.96*summary(m)[["coefficients"]][4])
}
effect = data.frame(factor, odds.ratio, lower.or, upper.or)
effect
## factor odds.ratio lower.or upper.or
## 1 Age 1.0711984 1.0637608 1.078688
## 2 Male 1.1330394 0.9981748 1.286126
## 3 Systolic 1.0266984 1.0241559 1.029247
## 4 Total Cholesterol 1.0008544 0.9994589 1.002252
## 5 BMI 1.0541061 1.0392176 1.069208
## 6 Smoker 0.9387118 0.8260105 1.066790
## 7 Diabetes 3.4687015 2.8119149 4.278896
ggplot(effect, aes(x=factor, y=odds.ratio)) + geom_point(color="black", size=2) + geom_errorbar(aes(ymin=lower.or, ymax=upper.or), width=0.16, color="black", size=0.6) + theme(legend.position="none") + coord_flip() +geom_hline(yintercept = 1, linetype="dashed", size=0.5) + scale_y_continuous(breaks = seq(0.8, 4.4, 0.2)) + ylab("Odds Ratio") + xlab("Factor") + theme_igray()
t = epiDisplay::logistic.display(glm(fmh$stroke ~ fmh$age + fmh$gender + fmh$sysbp + fmh$tot.chol + fmh$bmi + factor(fmh$smoker) + factor(fmh$diabetes), family=binomial))
t <- plyr::ldply (t, data.frame)
colnames(t)[2] = "OddsRatio"
t[1,1] = "Age"
t[2,1] = "Male"
t[3,1] = "Systolic"
t[4,1] = "Total Cholesterol"
t[5,1] = "BMI"
t[6,1] = "Smoker"
t[7,1] = "Diabetes"
t
## .id OddsRatio lower95ci upper95ci Pr...Z..
## 1 Age 1.0570879 1.0486064 1.065638 1.415878e-41
## 2 Male 1.1702586 1.0171387 1.346429 2.798646e-02
## 3 Systolic 1.0192726 1.0163326 1.022221 2.267641e-38
## 4 Total Cholesterol 0.9987897 0.9972766 1.000305 1.174701e-01
## 5 BMI 1.0244120 1.0083778 1.040701 2.731204e-03
## 6 Smoker 1.4724605 1.2730545 1.703101 1.871578e-07
## 7 Diabetes 1.9611207 1.5546215 2.473910 1.323606e-08
ggplot(data= t, aes(y= .id, x= OddsRatio, label= .id)) + geom_point(size= 2, shape= 19) + geom_errorbarh (aes(xmin= lower95ci, xmax= upper95ci), height=.2, size = 1) + coord_fixed (ratio= 0.3) + geom_vline(xintercept=1, linetype='longdash')
ggplot(t, aes(x=.id, y=OddsRatio)) + geom_point(color="black", size=2) + geom_errorbar(aes(ymin=lower95ci, ymax=upper95ci), width=0.16, color="black", size=0.6) + theme(legend.position="none") + coord_flip() +geom_hline(yintercept = 1, linetype="dashed", size=0.5) + scale_y_continuous(breaks = seq(0.8, 2.6, 0.2)) + ylab("Odds Ratio") + xlab("Factor") + theme_igray()