library(haven)
df <- read_sas("/Users/thien/Desktop/R-dir/Depressive Disorder project/nhanes.sas7bdat")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.4 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
# Filter the dataframe
df21 <- df %>%
filter(DPQ010 != 7 & DPQ010 != 9 & !is.na(DPQ010))
df22 <- df21 %>%
filter(DPQ020 != 7 & DPQ020 != 9 & !is.na(DPQ020))
df23 <- df22 %>%
filter(DPQ030 != 7 & DPQ030 != 9 & !is.na(DPQ030))
df24 <- df23 %>%
filter(DPQ040 != 7 & DPQ040 != 9 & !is.na(DPQ040))
df25 <- df24 %>%
filter(DPQ050 != 7 & DPQ050 != 9 & !is.na(DPQ050))
df26 <- df25 %>%
filter(DPQ060 != 7 & DPQ060 != 9 & !is.na(DPQ060))
df27 <- df26 %>%
filter(DPQ070 != 7 & DPQ070 != 9 & !is.na(DPQ070))
df28 <- df27 %>%
filter(DPQ080 != 7 & DPQ080 != 9 & !is.na(DPQ080))
df29 <- df28 %>%
filter(DPQ090 != 7 & DPQ090 != 9 & !is.na(DPQ090))
# Create depression variable:
depress <- df29
depress$dp = depress$DPQ010 + depress$DPQ020 + depress$DPQ030 + depress$DPQ040 + depress$DPQ050 + depress$DPQ060 + depress$DPQ070 + depress$DPQ080 + depress$DPQ090
# Create the new variable 'dp10'
depress <- depress %>%
mutate(dp10 = ifelse(dp <= 9, 0, 1))
table(depress$dp10)
##
## 0 1
## 4861 511
# 3.1 Create SBP and DBP
depress$SBP = (depress$BPXSY1 + depress$BPXSY2) / 2
depress$DBP =(depress$BPXDI1 + depress$BPXDI2) / 2
# 3.2 Create the variable ‘hyper’
hypertension <- depress %>%
mutate(hyper = case_when(
SBP >= 140 | DBP >= 90 | BPQ040A == 1 | BPQ050A == 1 ~ 1,
TRUE ~ 0))
table(hypertension$hyper)
##
## 0 1
## 3458 1914
# 3.3 Create the variable ‘dyslip’ using case_when
lipid <- hypertension %>%
mutate(dyslip = case_when(
LBXTC >= 200 | LBXTR >= 150 | LBDLDL >= 100 | LBDHDD <= 40 | BPQ090D == 1 ~ 1,
TRUE ~ 0
))
table(lipid$dyslip)
##
## 0 1
## 1681 3691
# 3.4 Create the ‘db’ variable using case_when
diab <- lipid %>%
mutate(db = case_when(
LBXGLU >= 126 | LBXGLT >= 200 | LBXGH >= 6.5 | DIQ070 == 1 ~ 1,
TRUE ~ 0
))
table(diab$db)
##
## 0 1
## 4542 830
df3 = subset(diab, select = c(RIDAGEYR, RIAGENDR, RIDRETH3, DMDEDUC2, DMDMARTL,
SBP, DBP, BMXBMI, INDFMIN2, INDFMPIR,
LBXSBU, LBXSCR, LBXSASSI, LBXSATSI,
LBXAPB, LBDHDD, LBDLDL, LBXTR, LBXTC,
LBXGLU, LBXGLT, ALQ101, SMQ020,
PAQ665, PAQ650, LBXCOT, LBXHCT, URXCOTT, URXHCTT,
db, dyslip, hyper, dp10))
# Rename all variables in one code
df4 <- df3 %>% rename(age = RIDAGEYR, sex = RIAGENDR, ethnic = RIDRETH3,
educ = DMDEDUC2, masts = DMDMARTL, BMI = BMXBMI,
finc = INDFMIN2, PIR = INDFMPIR,
BUN = LBXSBU, crea = LBXSCR, AST = LBXSASSI, ALT = LBXSATSI,
apoB = LBXAPB, HDL = LBDHDD, LDL = LBDLDL,
TG = LBXTR, TC = LBXTC, Glu = LBXGLU, tolGlu = LBXGLT,
sCOT = LBXCOT, sHCOT = LBXHCT, uCOT = URXCOTT, uHCOT = URXHCTT)
# Filter out values ‘7, 77’ and ‘9, 99’ from categorical variables column
## Those values are unknown and confused values
df5 <- df4 %>%
filter(educ %in% c('1', '2', '3', '4', '5'),
masts %in% c('1', '2', '3', '4', '5', '6'),
ALQ101 %in% c('1', '2'),
SMQ020 %in% c('1', '2'))
# Change to factor
df_new = df5
df_new$dp10 = as.factor(df_new$dp10)
df_new$sex = as.factor(df_new$sex)
df_new$ethnic = as.factor(df_new$ethnic)
df_new$educ = as.factor(df_new$educ)
df_new$masts = as.factor(df_new$masts)
df_new$ALQ101 = as.factor(df_new$ALQ101)
df_new$SMQ020 = as.factor(df_new$SMQ020)
df_new$PAQ650 = as.factor(df_new$PAQ650)
df_new$PAQ665 = as.factor(df_new$PAQ665)
df_new$hyper = as.factor(df_new$hyper)
df_new$db = as.factor(df_new$db)
df_new$dyslip = as.factor(df_new$dyslip)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ age + sex + ethnic + educ + masts + SBP + DBP + BMI + finc + PIR +
BUN + crea + AST + ALT + apoB + HDL + LDL + TG + TC +
Glu + ALQ101 + SMQ020 + PAQ650 + PAQ665 +
sCOT + sHCOT + uCOT + uHCOT + db + dyslip + hyper | dp10, data = df_new)
| 0 (N=4553) |
1 (N=487) |
Overall (N=5040) |
|
|---|---|---|---|
| Age in years at screening | |||
| Mean (SD) | 48.9 (17.6) | 51.6 (16.4) | 49.2 (17.5) |
| Median [Min, Max] | 48.0 [20.0, 80.0] | 54.0 [20.0, 80.0] | 49.0 [20.0, 80.0] |
| sex | |||
| 1 | 2275 (50.0%) | 158 (32.4%) | 2433 (48.3%) |
| 2 | 2278 (50.0%) | 329 (67.6%) | 2607 (51.7%) |
| ethnic | |||
| 1 | 607 (13.3%) | 67 (13.8%) | 674 (13.4%) |
| 2 | 392 (8.6%) | 57 (11.7%) | 449 (8.9%) |
| 3 | 2006 (44.1%) | 216 (44.4%) | 2222 (44.1%) |
| 4 | 905 (19.9%) | 105 (21.6%) | 1010 (20.0%) |
| 6 | 509 (11.2%) | 17 (3.5%) | 526 (10.4%) |
| 7 | 134 (2.9%) | 25 (5.1%) | 159 (3.2%) |
| educ | |||
| 1 | 292 (6.4%) | 64 (13.1%) | 356 (7.1%) |
| 2 | 575 (12.6%) | 103 (21.1%) | 678 (13.5%) |
| 3 | 1009 (22.2%) | 117 (24.0%) | 1126 (22.3%) |
| 4 | 1442 (31.7%) | 148 (30.4%) | 1590 (31.5%) |
| 5 | 1235 (27.1%) | 55 (11.3%) | 1290 (25.6%) |
| masts | |||
| 1 | 2443 (53.7%) | 187 (38.4%) | 2630 (52.2%) |
| 2 | 309 (6.8%) | 57 (11.7%) | 366 (7.3%) |
| 3 | 484 (10.6%) | 98 (20.1%) | 582 (11.5%) |
| 4 | 123 (2.7%) | 27 (5.5%) | 150 (3.0%) |
| 5 | 872 (19.2%) | 82 (16.8%) | 954 (18.9%) |
| 6 | 322 (7.1%) | 36 (7.4%) | 358 (7.1%) |
| Systolic: Blood pres (1st rdg) mm Hg | |||
| Mean (SD) | 123 (17.2) | 125 (19.1) | 123 (17.4) |
| Median [Min, Max] | 120 [70.0, 229] | 122 [66.0, 216] | 120 [66.0, 229] |
| Missing | 412 (9.0%) | 55 (11.3%) | 467 (9.3%) |
| Diastolic: Blood pres (1st rdg) mm Hg | |||
| Mean (SD) | 69.4 (12.5) | 69.7 (12.4) | 69.4 (12.5) |
| Median [Min, Max] | 70.0 [0, 119] | 71.0 [0, 104] | 70.0 [0, 119] |
| Missing | 412 (9.0%) | 55 (11.3%) | 467 (9.3%) |
| Body Mass Index (kg/m**2) | |||
| Mean (SD) | 28.9 (6.87) | 31.9 (9.32) | 29.2 (7.19) |
| Median [Min, Max] | 27.7 [14.1, 70.1] | 30.8 [15.2, 82.9] | 27.9 [14.1, 82.9] |
| Missing | 41 (0.9%) | 5 (1.0%) | 46 (0.9%) |
| Annual family income | |||
| Mean (SD) | 10.8 (13.4) | 8.97 (15.0) | 10.6 (13.6) |
| Median [Min, Max] | 8.00 [1.00, 99.0] | 6.00 [1.00, 99.0] | 8.00 [1.00, 99.0] |
| Missing | 41 (0.9%) | 8 (1.6%) | 49 (1.0%) |
| Ratio of family income to poverty | |||
| Mean (SD) | 2.62 (1.66) | 1.74 (1.33) | 2.53 (1.65) |
| Median [Min, Max] | 2.26 [0, 5.00] | 1.27 [0, 5.00] | 2.15 [0, 5.00] |
| Missing | 322 (7.1%) | 42 (8.6%) | 364 (7.2%) |
| Blood urea nitrogen (mg/dL) | |||
| Mean (SD) | 13.4 (5.73) | 13.6 (9.17) | 13.4 (6.15) |
| Median [Min, Max] | 12.0 [1.00, 73.0] | 12.0 [2.00, 95.0] | 12.0 [1.00, 95.0] |
| Missing | 170 (3.7%) | 20 (4.1%) | 190 (3.8%) |
| Creatinine (mg/dL) | |||
| Mean (SD) | 0.912 (0.441) | 0.995 (1.09) | 0.920 (0.539) |
| Median [Min, Max] | 0.860 [0.300, 16.6] | 0.820 [0.360, 17.4] | 0.860 [0.300, 17.4] |
| Missing | 170 (3.7%) | 20 (4.1%) | 190 (3.8%) |
| Aspartate aminotransferase AST (U/L) | |||
| Mean (SD) | 25.3 (18.6) | 26.6 (21.7) | 25.4 (18.9) |
| Median [Min, Max] | 22.0 [9.00, 882] | 22.0 [11.0, 294] | 22.0 [9.00, 882] |
| Missing | 172 (3.8%) | 20 (4.1%) | 192 (3.8%) |
| Alanine aminotransferase ALT (U/L) | |||
| Mean (SD) | 25.0 (18.9) | 25.7 (21.5) | 25.1 (19.1) |
| Median [Min, Max] | 20.0 [6.00, 536] | 20.0 [7.00, 300] | 20.0 [6.00, 536] |
| Missing | 172 (3.8%) | 20 (4.1%) | 192 (3.8%) |
| Apolipoprotein (B) (mg/dL) | |||
| Mean (SD) | 89.8 (24.6) | 95.3 (26.5) | 90.3 (24.8) |
| Median [Min, Max] | 88.0 [24.0, 228] | 94.0 [20.0, 234] | 89.0 [20.0, 234] |
| Missing | 2447 (53.7%) | 273 (56.1%) | 2720 (54.0%) |
| Direct HDL-Cholesterol (mg/dL) | |||
| Mean (SD) | 52.9 (16.2) | 51.2 (15.5) | 52.8 (16.1) |
| Median [Min, Max] | 50.0 [10.0, 173] | 48.0 [22.0, 117] | 50.0 [10.0, 173] |
| Missing | 159 (3.5%) | 17 (3.5%) | 176 (3.5%) |
| LDL-cholesterol (mg/dL) | |||
| Mean (SD) | 111 (34.6) | 113 (37.1) | 111 (34.9) |
| Median [Min, Max] | 108 [14.0, 375] | 112 [24.0, 288] | 108 [14.0, 375] |
| Missing | 2480 (54.5%) | 276 (56.7%) | 2756 (54.7%) |
| Triglyceride (mg/dL) | |||
| Mean (SD) | 120 (130) | 142 (98.7) | 122 (128) |
| Median [Min, Max] | 94.0 [14.0, 4230] | 121 [21.0, 1000] | 96.0 [14.0, 4230] |
| Missing | 2446 (53.7%) | 273 (56.1%) | 2719 (53.9%) |
| Total Cholesterol( mg/dL) | |||
| Mean (SD) | 189 (41.9) | 192 (41.3) | 189 (41.9) |
| Median [Min, Max] | 185 [82.0, 813] | 189 [85.0, 380] | 186 [82.0, 813] |
| Missing | 159 (3.5%) | 17 (3.5%) | 176 (3.5%) |
| Fasting Glucose (mg/dL) | |||
| Mean (SD) | 107 (32.6) | 117 (43.8) | 107 (33.9) |
| Median [Min, Max] | 99.0 [51.0, 405] | 102 [67.0, 375] | 99.0 [51.0, 405] |
| Missing | 2436 (53.5%) | 269 (55.2%) | 2705 (53.7%) |
| ALQ101 | |||
| 1 | 3293 (72.3%) | 350 (71.9%) | 3643 (72.3%) |
| 2 | 1260 (27.7%) | 137 (28.1%) | 1397 (27.7%) |
| SMQ020 | |||
| 1 | 1924 (42.3%) | 280 (57.5%) | 2204 (43.7%) |
| 2 | 2629 (57.7%) | 207 (42.5%) | 2836 (56.3%) |
| PAQ650 | |||
| 1 | 1090 (23.9%) | 48 (9.9%) | 1138 (22.6%) |
| 2 | 3463 (76.1%) | 439 (90.1%) | 3902 (77.4%) |
| PAQ665 | |||
| 1 | 2009 (44.1%) | 123 (25.3%) | 2132 (42.3%) |
| 2 | 2544 (55.9%) | 364 (74.7%) | 2908 (57.7%) |
| Cotinine, Serum (ng/mL) | |||
| Mean (SD) | 56.7 (131) | 104 (163) | 61.3 (135) |
| Median [Min, Max] | 0.0330 [0.0110, 1820] | 0.233 [0.0110, 1030] | 0.0360 [0.0110, 1820] |
| Missing | 157 (3.4%) | 18 (3.7%) | 175 (3.5%) |
| Hematocrit (%) | |||
| Mean (SD) | 22.6 (60.9) | 45.4 (78.0) | 24.8 (63.1) |
| Median [Min, Max] | 0.0110 [0.0110, 1150] | 0.0840 [0.0110, 540] | 0.0110 [0.0110, 1150] |
| Missing | 157 (3.4%) | 18 (3.7%) | 175 (3.5%) |
| Total Cotinine, urine (ng/mL) | |||
| Mean (SD) | 686 (1830) | 1570 (3240) | 759 (2000) |
| Median [Min, Max] | 0.411 [0.0210, 26800] | 13.8 [0.0210, 24200] | 0.451 [0.0210, 26800] |
| Missing | 3076 (67.6%) | 354 (72.7%) | 3430 (68.1%) |
| Total Hydroxycotinine, urine (ng/mL) | |||
| Mean (SD) | 1350 (4210) | 3140 (6200) | 1500 (4430) |
| Median [Min, Max] | 0.739 [0.0210, 70600] | 20.1 [0.0210, 40400] | 0.829 [0.0210, 70600] |
| Missing | 3076 (67.6%) | 354 (72.7%) | 3430 (68.1%) |
| db | |||
| 0 | 3849 (84.5%) | 364 (74.7%) | 4213 (83.6%) |
| 1 | 704 (15.5%) | 123 (25.3%) | 827 (16.4%) |
| dyslip | |||
| 0 | 1354 (29.7%) | 118 (24.2%) | 1472 (29.2%) |
| 1 | 3199 (70.3%) | 369 (75.8%) | 3568 (70.8%) |
| hyper | |||
| 0 | 2915 (64.0%) | 225 (46.2%) | 3140 (62.3%) |
| 1 | 1638 (36.0%) | 262 (53.8%) | 1900 (37.7%) |
# About the DD variable
ggplot(data = df_new, mapping = aes(dp10, fill = dp10)) + geom_bar(width = 0.5) +
xlab("Depressive Disorder") +
scale_x_discrete(labels = c ("No", "Yes")) +
ylab("Count") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none") +
geom_text(stat='count', aes(label=..count..), vjust=-0.5) +
geom_text(stat='count', aes(label=paste0(round(after_stat(count/sum(count)*100),1),'%')), vjust=2, color = "black")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
library(compareGroups)
med_iqr <- compareGroups(dp10 ~ . , data = df_new, method = c(age = 2, finc = 2, PIR = 2, crea = 2, AST = 2, ALT = 2, TG = 2, Glu = 2))
createTable(med_iqr)
##
## --------Summary descriptives table by 'dp10'---------
##
## _________________________________________________________________________________
## 0 1 p.overall
## N=4553 N=487
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Age in years at screening 48.0 [34.0;63.0] 54.0 [38.0;63.0] 0.001
## sex: <0.001
## 1 2275 (50.0%) 158 (32.4%)
## 2 2278 (50.0%) 329 (67.6%)
## ethnic: <0.001
## 1 607 (13.3%) 67 (13.8%)
## 2 392 (8.61%) 57 (11.7%)
## 3 2006 (44.1%) 216 (44.4%)
## 4 905 (19.9%) 105 (21.6%)
## 6 509 (11.2%) 17 (3.49%)
## 7 134 (2.94%) 25 (5.13%)
## educ: <0.001
## 1 292 (6.41%) 64 (13.1%)
## 2 575 (12.6%) 103 (21.1%)
## 3 1009 (22.2%) 117 (24.0%)
## 4 1442 (31.7%) 148 (30.4%)
## 5 1235 (27.1%) 55 (11.3%)
## masts: <0.001
## 1 2443 (53.7%) 187 (38.4%)
## 2 309 (6.79%) 57 (11.7%)
## 3 484 (10.6%) 98 (20.1%)
## 4 123 (2.70%) 27 (5.54%)
## 5 872 (19.2%) 82 (16.8%)
## 6 322 (7.07%) 36 (7.39%)
## Systolic: Blood pres (1st rdg) mm Hg 123 (17.2) 125 (19.1) 0.003
## Diastolic: Blood pres (1st rdg) mm Hg 69.4 (12.5) 69.7 (12.4) 0.656
## Body Mass Index (kg/m**2) 28.9 (6.87) 31.9 (9.32) <0.001
## Annual family income 8.00 [5.00;14.0] 6.00 [3.00;8.00] <0.001
## Ratio of family income to poverty 2.26 [1.13;4.28] 1.27 [0.78;2.35] <0.001
## Blood urea nitrogen (mg/dL) 13.4 (5.73) 13.6 (9.17) 0.597
## Creatinine (mg/dL) 0.86 [0.72;1.02] 0.82 [0.71;1.01] 0.052
## Aspartate aminotransferase AST (U/L) 22.0 [19.0;27.0] 22.0 [18.5;28.0] 0.467
## Alanine aminotransferase ALT (U/L) 20.0 [16.0;28.0] 20.0 [15.0;28.0] 0.369
## Apolipoprotein (B) (mg/dL) 89.8 (24.6) 95.3 (26.5) 0.004
## Direct HDL-Cholesterol (mg/dL) 52.9 (16.2) 51.2 (15.5) 0.019
## LDL-cholesterol (mg/dL) 111 (34.6) 113 (37.1) 0.359
## Triglyceride (mg/dL) 94.0 [64.0;139] 120 [80.0;183] <0.001
## Total Cholesterol( mg/dL) 189 (41.9) 192 (41.3) 0.090
## Fasting Glucose (mg/dL) 99.0 [92.0;108] 102 [95.0;117] <0.001
## Two Hour Glucose(OGTT) (mg/dL) 118 (50.1) 129 (52.3) 0.016
## ALQ101: 0.872
## 1 3293 (72.3%) 350 (71.9%)
## 2 1260 (27.7%) 137 (28.1%)
## SMQ020: <0.001
## 1 1924 (42.3%) 280 (57.5%)
## 2 2629 (57.7%) 207 (42.5%)
## PAQ665: <0.001
## 1 2009 (44.1%) 123 (25.3%)
## 2 2544 (55.9%) 364 (74.7%)
## PAQ650: <0.001
## 1 1090 (23.9%) 48 (9.86%)
## 2 3463 (76.1%) 439 (90.1%)
## Cotinine, Serum (ng/mL) 56.7 (131) 104 (163) <0.001
## Hematocrit (%) 22.6 (60.9) 45.4 (78.0) <0.001
## Total Cotinine, urine (ng/mL) 686 (1833) 1573 (3242) 0.002
## Total Hydroxycotinine, urine (ng/mL) 1348 (4209) 3143 (6196) 0.001
## db: <0.001
## 0 3849 (84.5%) 364 (74.7%)
## 1 704 (15.5%) 123 (25.3%)
## dyslip: 0.013
## 0 1354 (29.7%) 118 (24.2%)
## 1 3199 (70.3%) 369 (75.8%)
## hyper: <0.001
## 0 2915 (64.0%) 225 (46.2%)
## 1 1638 (36.0%) 262 (53.8%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# We can export this table by the following codes:
table_result <- createTable(med_iqr)
export2word(table_result, file = "tab_result.docx")
# Change the reference category to 'No = 2'
# Those variables are categorical variables with 1: Yes, 2: No
df_new$SMQ020 <- relevel(df_new$SMQ020, ref = 2)
df_new$ALQ101 <- relevel(df_new$ALQ101, ref = 2)
df_new$PAQ665 <- relevel(df_new$PAQ665, ref = 2)
# Calculate OR(95% CI) for serum nicotine metabolites of having DD
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following object is masked from 'package:ggplot2':
##
## alpha
model_sCOT = glm(dp10 ~ sCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, family = binomial, data = df_new)
logistic.display(model_sCOT)
##
## Logistic regression predicting dp10 : 1 vs 0
##
## crude OR(95%CI) adj. OR(95%CI)
## sCOT (cont. var.) 1.0015 (1.0006,1.0024) 1.0011 (1,1.0022)
##
## age (cont. var.) 1.0152 (1.0061,1.0244) 1.0062 (0.9908,1.0217)
##
## sex: 2 vs 1 1.74 (1.26,2.41) 1.81 (1.2,2.73)
##
## educ: ref.=1
## 2 0.66 (0.36,1.19) 0.91 (0.47,1.77)
## 3 0.45 (0.25,0.8) 0.67 (0.35,1.28)
## 4 0.52 (0.3,0.89) 0.88 (0.47,1.65)
## 5 0.2 (0.11,0.38) 0.67 (0.31,1.43)
##
## masts: ref.=1
## 2 2.33 (1.37,3.96) 1.38 (0.74,2.58)
## 3 2.72 (1.77,4.18) 1.83 (1.15,2.93)
## 4 2.42 (1.15,5.13) 1.25 (0.54,2.91)
## 5 0.87 (0.53,1.42) 0.81 (0.47,1.43)
## 6 1.23 (0.65,2.33) 0.88 (0.44,1.77)
##
## PIR (cont. var.) 0.69 (0.61,0.77) 0.74 (0.64,0.85)
##
## SMQ020: 1 vs 2 1.69 (1.23,2.31) 1.2 (0.82,1.75)
##
## ALQ101: 1 vs 2 0.97 (0.69,1.38) 1.25 (0.83,1.89)
##
## PAQ665: 1 vs 2 0.41 (0.29,0.59) 0.55 (0.37,0.8)
##
## BMI (cont. var.) 1.06 (1.04,1.08) 1.05 (1.02,1.07)
##
## SBP (cont. var.) 1.0088 (1.0004,1.0172) 0.9933 (0.9821,1.0046)
##
## DBP (cont. var.) 1.001 (0.9887,1.0135) 0.9996 (0.9853,1.0141)
##
## AST (cont. var.) 1.0026 (0.9985,1.0066) 1.0017 (0.9947,1.0088)
##
## ALT (cont. var.) 1.005 (0.998,1.0121) 1.0027 (0.9906,1.0149)
##
## Glu (cont. var.) 1.0073 (1.0037,1.0109) 1.0022 (0.9968,1.0075)
##
## BUN (cont. var.) 1.0254 (1.0017,1.0497) 1.0089 (0.9772,1.0416)
##
## crea (cont. var.) 1.07 (0.85,1.35) 1.15 (0.82,1.62)
##
## TC (cont. var.) 1 (1,1.01) 0.86 (0.48,1.54)
##
## TG (cont. var.) 1.01 (1,1.01) 1.04 (0.92,1.16)
##
## HDL (cont. var.) 0.99 (0.98,1) 1.18 (0.66,2.11)
##
## LDL (cont. var.) 1 (1,1.01) 1.17 (0.65,2.09)
##
## apoB (cont. var.) 1.009 (1.0029,1.0152) 1.0062 (0.9862,1.0265)
##
## hyper: 1 vs 0 2.24 (1.63,3.07) 1.51 (0.98,2.33)
##
## db: 1 vs 0 1.89 (1.33,2.69) 1.02 (0.61,1.7)
##
## dyslip: 1 vs 0 1.54 (1,2.37) 0.83 (0.48,1.44)
##
## P(Wald's test) P(LR-test)
## sCOT (cont. var.) 0.043 0.046
##
## age (cont. var.) 0.433 0.433
##
## sex: 2 vs 1 0.005 0.004
##
## educ: ref.=1 0.565
## 2 0.792
## 3 0.226
## 4 0.698
## 5 0.301
##
## masts: ref.=1 0.128
## 2 0.305
## 3 0.011
## 4 0.597
## 5 0.473
## 6 0.717
##
## PIR (cont. var.) < 0.001 < 0.001
##
## SMQ020: 1 vs 2 0.36 0.359
##
## ALQ101: 1 vs 2 0.289 0.285
##
## PAQ665: 1 vs 2 0.002 0.001
##
## BMI (cont. var.) < 0.001 < 0.001
##
## SBP (cont. var.) 0.245 0.242
##
## DBP (cont. var.) 0.956 0.956
##
## AST (cont. var.) 0.628 0.639
##
## ALT (cont. var.) 0.663 0.666
##
## Glu (cont. var.) 0.426 0.432
##
## BUN (cont. var.) 0.586 0.586
##
## crea (cont. var.) 0.426 0.481
##
## TC (cont. var.) 0.604 0.603
##
## TG (cont. var.) 0.553 0.553
##
## HDL (cont. var.) 0.583 0.583
##
## LDL (cont. var.) 0.605 0.605
##
## apoB (cont. var.) 0.548 0.545
##
## hyper: 1 vs 0 0.061 0.06
##
## db: 1 vs 0 0.948 0.948
##
## dyslip: 1 vs 0 0.507 0.51
##
## Log-likelihood = -500.4239
## No. of observations = 1928
## AIC value = 1066.8478
library(caret)
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
##
## dotplot
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
## The following object is masked from 'package:purrr':
##
## lift
varImp(model_sCOT, scale = F)
## Overall
## sCOT 2.01999514
## age 0.78438767
## sex2 2.83575260
## educ2 0.26414365
## educ3 1.21046989
## educ4 0.38791087
## educ5 1.03330527
## masts2 1.02528073
## masts3 2.53416477
## masts4 0.52903883
## masts5 0.71750870
## masts6 0.36250422
## PIR 4.33892121
## SMQ0201 0.91620086
## ALQ1011 1.06073134
## PAQ6651 3.11294767
## BMI 3.93677354
## SBP 1.16200658
## DBP 0.05575828
## AST 0.48484735
## ALT 0.43638549
## Glu 0.79669306
## BUN 0.54464348
## crea 0.79544683
## TC 0.51926108
## TG 0.59329743
## HDL 0.54882278
## LDL 0.51744607
## apoB 0.60068615
## hyper1 1.87501904
## db1 0.06471414
## dyslip1 0.66285767
coef_abs <- abs(coef(model_sCOT))
# Sorting coefficients and names in ascending order
sorted_coef_abs <- sort(coef_abs)
sorted_names <- names(coef_abs)[order(coef_abs)]
# Creating a bar plot in ascending order of absolute coefficients
barplot(sorted_coef_abs, names.arg = sorted_names, horiz = TRUE, las = 1, main = "Variable Importance - by coefficients")
# Assuming ‘model_logistic’ is logistic regression model object
summary_model <- summary(model_sCOT)
# Extracting p-values
p_values <- summary_model$coefficients[, "Pr(>|z|)"]
# Sorting p-values and names in ascending order
sorted_p_values <- sort(p_values)
sorted_names <- names(p_values)[order(p_values)]
#sorted_names <- names(p_values)[order(p_values, increasing = TRUE)]
# Creating a bar plot in ascending order of -log(p-values)
barplot(-log(sorted_p_values), names.arg = sorted_names, horiz = TRUE, las = 1, main = "Variable Significance - based on p-value")
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-2)
bma <- bic.glm(dp10 ~ sCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data = df_new, glm.family = "binomial")
summary(bma)
##
## Call:
## bic.glm.formula(f = dp10 ~ sCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data = df_new, glm.family = "binomial")
##
##
## 39 models were selected
## Best 5 models (cumulative posterior probability = 0.4732 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 -2.393e+00 0.7654907 -1.797e+00 -2.127e+00 -2.868e+00
## sCOT.x 39.3 1.027e-03 0.0014281 . 2.461e-03 2.986e-03
## age.x 11.4 2.317e-03 0.0074124 . . .
## sex.x 0.9
## .2 6.308e-03 0.0760977 . . .
## educ.x 0.0
## .2 0.000e+00 0.0000000 . . .
## .3 0.000e+00 0.0000000 . . .
## .4 0.000e+00 0.0000000 . . .
## .5 0.000e+00 0.0000000 . . .
## masts.x 0.0
## .2 0.000e+00 0.0000000 . . .
## .3 0.000e+00 0.0000000 . . .
## .4 0.000e+00 0.0000000 . . .
## .5 0.000e+00 0.0000000 . . .
## .6 0.000e+00 0.0000000 . . .
## PIR.x 84.4 -3.111e-01 0.1819668 -3.788e-01 -3.284e-01 .
## SMQ020.x 5.2
## .1 3.536e-02 0.1740056 . . .
## ALQ101.x 1.1
## .1 3.275e-03 0.0538075 . . .
## PAQ665.x 10.9
## .1 -7.522e-02 0.2538085 . . .
## BMI.x 3.8 1.211e-03 0.0076648 . . .
## SBP.x 1.1 -9.262e-05 0.0015120 . . .
## DBP.x 0.9 6.926e-05 0.0015494 . . .
## AST.x 0.0 0.000e+00 0.0000000 . . .
## ALT.x 1.0 -1.103e-04 0.0018971 . . .
## Glu.x 0.9 3.742e-05 0.0007259 . . .
## BUN.x 2.6 9.633e-04 0.0077357 . . .
## crea.x 9.8 1.463e-01 0.5040639 . . .
## TC.x 3.0 1.567e-04 0.0011420 . . .
## TG.x 2.6 7.461e-05 0.0005997 . . .
## HDL.x 0.0 0.000e+00 0.0000000 . . .
## LDL.x 2.5 1.278e-04 0.0010852 . . .
## apoB.x 2.2 1.524e-04 0.0014293 . . .
## hyper.x 2.8
## .1 1.261e-02 0.0965026 . . .
## db.x 0.0
## .1 0.000e+00 0.0000000 . . .
## dyslip.x 1.1
## .1 3.935e-03 0.0616649 . . .
##
## nVar 1 2 1
## BIC -2.786e+03 -2.785e+03 -2.784e+03
## post prob 0.177 0.121 0.070
## model 4 model 5
## Intercept -3.071e+00 -2.672e+00
## sCOT.x . .
## age.x . 1.968e-02
## sex.x
## .2 . .
## educ.x
## .2 . .
## .3 . .
## .4 . .
## .5 . .
## masts.x
## .2 . .
## .3 . .
## .4 . .
## .5 . .
## .6 . .
## PIR.x -4.027e-01 -4.202e-01
## SMQ020.x
## .1 . .
## ALQ101.x
## .1 . .
## PAQ665.x
## .1 . .
## BMI.x . .
## SBP.x . .
## DBP.x . .
## AST.x . .
## ALT.x . .
## Glu.x . .
## BUN.x . .
## crea.x 1.492e+00 .
## TC.x . .
## TG.x . .
## HDL.x . .
## LDL.x . .
## apoB.x . .
## hyper.x
## .1 . .
## db.x
## .1 . .
## dyslip.x
## .1 . .
##
## nVar 2 2
## BIC -2.784e+03 -2.783e+03
## post prob 0.058 0.047
##
## 4550 observations deleted due to missingness.
imageplot.bma(bma)
## 7.2 For sHCOT
model_sHCOT = glm(dp10 ~ sHCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, family = binomial, data = df_new)
logistic.display(model_sHCOT)
##
## Logistic regression predicting dp10 : 1 vs 0
##
## crude OR(95%CI) adj. OR(95%CI)
## sHCOT (cont. var.) 1.0031 (1.0012,1.0049) 1.0016 (0.9995,1.0037)
##
## age (cont. var.) 1.0152 (1.0061,1.0244) 1.0058 (0.9905,1.0213)
##
## sex: 2 vs 1 1.74 (1.26,2.41) 1.79 (1.19,2.69)
##
## educ: ref.=1
## 2 0.66 (0.36,1.19) 0.94 (0.48,1.81)
## 3 0.45 (0.25,0.8) 0.68 (0.35,1.3)
## 4 0.52 (0.3,0.89) 0.89 (0.47,1.66)
## 5 0.2 (0.11,0.38) 0.66 (0.31,1.42)
##
## masts: ref.=1
## 2 2.33 (1.37,3.96) 1.38 (0.74,2.57)
## 3 2.72 (1.77,4.18) 1.84 (1.15,2.94)
## 4 2.42 (1.15,5.13) 1.26 (0.54,2.93)
## 5 0.87 (0.53,1.42) 0.84 (0.48,1.47)
## 6 1.23 (0.65,2.33) 0.88 (0.44,1.77)
##
## PIR (cont. var.) 0.69 (0.61,0.77) 0.74 (0.64,0.85)
##
## SMQ020: 1 vs 2 1.69 (1.23,2.31) 1.26 (0.86,1.82)
##
## ALQ101: 1 vs 2 0.97 (0.69,1.38) 1.25 (0.83,1.89)
##
## PAQ665: 1 vs 2 0.41 (0.29,0.59) 0.54 (0.37,0.8)
##
## BMI (cont. var.) 1.06 (1.04,1.08) 1.05 (1.02,1.07)
##
## SBP (cont. var.) 1.0088 (1.0004,1.0172) 0.9936 (0.9824,1.0049)
##
## DBP (cont. var.) 1.001 (0.9887,1.0135) 0.9998 (0.9855,1.0144)
##
## AST (cont. var.) 1.0026 (0.9985,1.0066) 1.0016 (0.9946,1.0087)
##
## ALT (cont. var.) 1.005 (0.998,1.0121) 1.0027 (0.9906,1.0149)
##
## Glu (cont. var.) 1.0073 (1.0037,1.0109) 1.0021 (0.9968,1.0075)
##
## BUN (cont. var.) 1.0254 (1.0017,1.0497) 1.007 (0.9755,1.0396)
##
## crea (cont. var.) 1.07 (0.85,1.35) 1.16 (0.82,1.62)
##
## TC (cont. var.) 1 (1,1.01) 0.86 (0.48,1.55)
##
## TG (cont. var.) 1.01 (1,1.01) 1.03 (0.92,1.16)
##
## HDL (cont. var.) 0.99 (0.98,1) 1.17 (0.65,2.1)
##
## LDL (cont. var.) 1 (1,1.01) 1.16 (0.65,2.08)
##
## apoB (cont. var.) 1.009 (1.0029,1.0152) 1.0059 (0.9861,1.0262)
##
## hyper: 1 vs 0 2.24 (1.63,3.07) 1.51 (0.98,2.33)
##
## db: 1 vs 0 1.89 (1.33,2.69) 1.02 (0.61,1.7)
##
## dyslip: 1 vs 0 1.54 (1,2.37) 0.83 (0.48,1.44)
##
## P(Wald's test) P(LR-test)
## sHCOT (cont. var.) 0.127 0.131
##
## age (cont. var.) 0.461 0.462
##
## sex: 2 vs 1 0.005 0.005
##
## educ: ref.=1 0.55
## 2 0.842
## 3 0.238
## 4 0.711
## 5 0.292
##
## masts: ref.=1 0.139
## 2 0.31
## 3 0.011
## 4 0.591
## 5 0.538
## 6 0.719
##
## PIR (cont. var.) < 0.001 < 0.001
##
## SMQ020: 1 vs 2 0.233 0.232
##
## ALQ101: 1 vs 2 0.286 0.282
##
## PAQ665: 1 vs 2 0.002 0.001
##
## BMI (cont. var.) < 0.001 < 0.001
##
## SBP (cont. var.) 0.266 0.263
##
## DBP (cont. var.) 0.983 0.983
##
## AST (cont. var.) 0.652 0.662
##
## ALT (cont. var.) 0.667 0.67
##
## Glu (cont. var.) 0.434 0.44
##
## BUN (cont. var.) 0.667 0.667
##
## crea (cont. var.) 0.402 0.46
##
## TC (cont. var.) 0.621 0.62
##
## TG (cont. var.) 0.571 0.571
##
## HDL (cont. var.) 0.602 0.602
##
## LDL (cont. var.) 0.621 0.621
##
## apoB (cont. var.) 0.56 0.557
##
## hyper: 1 vs 0 0.061 0.06
##
## db: 1 vs 0 0.939 0.939
##
## dyslip: 1 vs 0 0.499 0.502
##
## Log-likelihood = -501.2713
## No. of observations = 1928
## AIC value = 1068.5426
varImp(model_sHCOT, scale = F)
## Overall
## sHCOT 1.52529694
## age 0.73692454
## sex2 2.77875472
## educ2 0.19912730
## educ3 1.17880130
## educ4 0.37087006
## educ5 1.05454017
## masts2 1.01596023
## masts3 2.54859147
## masts4 0.53741452
## masts5 0.61604698
## masts6 0.35975895
## PIR 4.33662888
## SMQ0201 1.19348681
## ALQ1011 1.06744602
## PAQ6651 3.13759703
## BMI 3.90324054
## SBP 1.11169101
## DBP 0.02181817
## AST 0.45095400
## ALT 0.43060147
## Glu 0.78319739
## BUN 0.43079560
## crea 0.83783152
## TC 0.49504369
## TG 0.56676855
## HDL 0.52137091
## LDL 0.49431652
## apoB 0.58260420
## hyper1 1.87448866
## db1 0.07699293
## dyslip1 0.67609710
summary_model <- summary(model_sHCOT)
# Extracting p-values
p_values <- summary_model$coefficients[, "Pr(>|z|)"]
# Sorting p-values and names in ascending order
sorted_p_values <- sort(p_values)
sorted_names <- names(p_values)[order(p_values)]
# Creating a bar plot in ascending order of -log(p-values)
barplot(-log(sorted_p_values), names.arg = sorted_names, horiz = TRUE, las = 1, main = "Variable Significance - based on p-value")
## 7.3 For uCOT
model_uCOT = glm(dp10 ~ uCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, family = binomial, data = df_new)
logistic.display(model_uCOT)
##
## Logistic regression predicting dp10 : 1 vs 0
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test)
## uCOT (cont. var.) 1.0002 (1.0001,1.0003) 1.0002 (1,1.0003) 0.015
##
## age (cont. var.) 1.02 (1,1.03) 1.02 (0.99,1.06) 0.129
##
## sex: 2 vs 1 1.4 (0.76,2.57) 1.66 (0.7,3.96) 0.251
##
## educ: ref.=1
## 2 0.62 (0.18,2.16) 0.7 (0.17,2.84) 0.614
## 3 0.65 (0.21,2.01) 0.79 (0.21,2.95) 0.724
## 4 0.72 (0.25,2.05) 1.32 (0.38,4.66) 0.663
## 5 0.28 (0.08,0.98) 0.98 (0.22,4.38) 0.981
##
## masts: ref.=1
## 2 2.49 (0.78,7.87) 1.28 (0.31,5.23) 0.732
## 3 3.87 (1.76,8.51) 2.6 (1.04,6.48) 0.041
## 4 3.73 (1.14,12.17) 1.66 (0.37,7.34) 0.506
## 5 0.7 (0.26,1.94) 0.63 (0.2,1.97) 0.425
## 6 1.17 (0.33,4.14) 0.72 (0.17,3.05) 0.651
##
## PIR (cont. var.) 0.65 (0.52,0.82) 0.61 (0.45,0.83) 0.001
##
## SMQ020: 1 vs 2 1.85 (1,3.4) 1.18 (0.54,2.56) 0.674
##
## ALQ101: 1 vs 2 1.13 (0.56,2.28) 2.15 (0.88,5.24) 0.093
##
## PAQ665: 1 vs 2 0.58 (0.3,1.11) 0.68 (0.32,1.43) 0.306
##
## BMI (cont. var.) 1.03 (0.99,1.07) 1.04 (0.98,1.09) 0.176
##
## SBP (cont. var.) 0.99 (0.98,1.01) 0.97 (0.94,1) 0.025
##
## DBP (cont. var.) 1 (0.98,1.03) 1.02 (0.99,1.05) 0.3
##
## AST (cont. var.) 1.01 (0.99,1.04) 1.03 (0.98,1.08) 0.267
##
## ALT (cont. var.) 1 (0.98,1.02) 0.99 (0.95,1.03) 0.612
##
## Glu (cont. var.) 1.0022 (0.9939,1.0104) 0.9963 (0.9827,1.01) 0.592
##
## BUN (cont. var.) 1.04 (0.99,1.09) 1.02 (0.95,1.08) 0.654
##
## crea (cont. var.) 1.92 (0.95,3.85) 1.79 (0.64,5.03) 0.267
##
## TC (cont. var.) 1 (1,1.01) 0.43 (0.13,1.47) 0.179
##
## TG (cont. var.) 1 (1,1.01) 1.18 (0.93,1.51) 0.176
##
## HDL (cont. var.) 0.99 (0.97,1.01) 2.31 (0.68,7.82) 0.178
##
## LDL (cont. var.) 1 (1,1.01) 2.34 (0.69,7.91) 0.172
##
## apoB (cont. var.) 1.0052 (0.9936,1.017) 0.9923 (0.9505,1.036) 0.726
##
## hyper: 1 vs 0 1.63 (0.89,2.98) 1.27 (0.52,3.11) 0.605
##
## db: 1 vs 0 1.29 (0.6,2.77) 1.36 (0.42,4.39) 0.61
##
## dyslip: 1 vs 0 1.06 (0.5,2.26) 0.46 (0.15,1.4) 0.172
##
## P(LR-test)
## uCOT (cont. var.) 0.009
##
## age (cont. var.) 0.13
##
## sex: 2 vs 1 0.25
##
## educ: ref.=1 0.751
## 2
## 3
## 4
## 5
##
## masts: ref.=1 0.269
## 2
## 3
## 4
## 5
## 6
##
## PIR (cont. var.) < 0.001
##
## SMQ020: 1 vs 2 0.674
##
## ALQ101: 1 vs 2 0.082
##
## PAQ665: 1 vs 2 0.3
##
## BMI (cont. var.) 0.182
##
## SBP (cont. var.) 0.019
##
## DBP (cont. var.) 0.28
##
## AST (cont. var.) 0.279
##
## ALT (cont. var.) 0.604
##
## Glu (cont. var.) 0.578
##
## BUN (cont. var.) 0.656
##
## crea (cont. var.) 0.264
##
## TC (cont. var.) 0.174
##
## TG (cont. var.) 0.171
##
## HDL (cont. var.) 0.173
##
## LDL (cont. var.) 0.167
##
## apoB (cont. var.) 0.728
##
## hyper: 1 vs 0 0.605
##
## db: 1 vs 0 0.614
##
## dyslip: 1 vs 0 0.177
##
## Log-likelihood = -131.6823
## No. of observations = 607
## AIC value = 329.3646
#############
summary_model <- summary(model_uCOT)
# Extracting p-values
p_values <- summary_model$coefficients[, "Pr(>|z|)"]
# Sorting p-values and names in ascending order
sorted_p_values <- sort(p_values)
sorted_names <- names(p_values)[order(p_values)]
# Creating a bar plot in ascending order of -log(p-values)
barplot(-log(sorted_p_values), names.arg = sorted_names, horiz = TRUE, las = 1, main = "Variable Significance - based on p-value")
##################
bma <- bic.glm(dp10 ~ uCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data = df_new, glm.family = "binomial")
summary(bma)
##
## Call:
## bic.glm.formula(f = dp10 ~ uCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data = df_new, glm.family = "binomial")
##
##
## 24 models were selected
## Best 5 models (cumulative posterior probability = 0.6006 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 -2.498e+00 7.498e-01 -2.096e+00 -3.182e+00 -2.848e+00
## uCOT.x 86.5 1.805e-04 9.401e-05 2.046e-04 2.249e-04 2.151e-04
## age.x 15.8 3.592e-03 9.393e-03 . 2.369e-02 .
## sex.x 1.5
## .2 3.188e-03 5.144e-02 . . .
## educ.x 0.0
## .2 0.000e+00 0.000e+00 . . .
## .3 0.000e+00 0.000e+00 . . .
## .4 0.000e+00 0.000e+00 . . .
## .5 0.000e+00 0.000e+00 . . .
## masts.x 0.0
## .2 0.000e+00 0.000e+00 . . .
## .3 0.000e+00 0.000e+00 . . .
## .4 0.000e+00 0.000e+00 . . .
## .5 0.000e+00 0.000e+00 . . .
## .6 0.000e+00 0.000e+00 . . .
## PIR.x 87.5 -3.200e-01 1.737e-01 -3.528e-01 -3.967e-01 .
## SMQ020.x 3.0
## .1 1.410e-02 1.089e-01 . . .
## ALQ101.x 1.5
## .1 3.808e-03 6.072e-02 . . .
## PAQ665.x 8.9
## .1 -6.197e-02 2.327e-01 . . .
## BMI.x 3.2 1.103e-03 7.374e-03 . . .
## SBP.x 1.5 -1.199e-04 1.779e-03 . . .
## DBP.x 0.0 0.000e+00 0.000e+00 . . .
## AST.x 0.0 0.000e+00 0.000e+00 . . .
## ALT.x 0.0 0.000e+00 0.000e+00 . . .
## Glu.x 1.6 8.506e-05 1.041e-03 . . .
## BUN.x 2.0 6.493e-04 6.431e-03 . . .
## crea.x 8.0 1.131e-01 4.363e-01 . . .
## TC.x 2.7 1.450e-04 1.111e-03 . . .
## TG.x 2.5 7.504e-05 6.039e-04 . . .
## HDL.x 0.0 0.000e+00 0.000e+00 . . .
## LDL.x 2.0 9.343e-05 9.407e-04 . . .
## apoB.x 1.8 1.072e-04 1.218e-03 . . .
## hyper.x 2.3
## .1 9.774e-03 8.500e-02 . . .
## db.x 0.0
## .1 0.000e+00 0.000e+00 . . .
## dyslip.x 0.0
## .1 0.000e+00 0.000e+00 . . .
##
## nVar 2 3 1
## BIC -2.789e+03 -2.787e+03 -2.786e+03
## post prob 0.268 0.124 0.084
## model 4 model 5
## Intercept -1.797e+00 -3.259e+00
## uCOT.x . 1.963e-04
## age.x . .
## sex.x
## .2 . .
## educ.x
## .2 . .
## .3 . .
## .4 . .
## .5 . .
## masts.x
## .2 . .
## .3 . .
## .4 . .
## .5 . .
## .6 . .
## PIR.x -3.788e-01 -3.799e-01
## SMQ020.x
## .1 . .
## ALQ101.x
## .1 . .
## PAQ665.x
## .1 . .
## BMI.x . .
## SBP.x . .
## DBP.x . .
## AST.x . .
## ALT.x . .
## Glu.x . .
## BUN.x . .
## crea.x . 1.381e+00
## TC.x . .
## TG.x . .
## HDL.x . .
## LDL.x . .
## apoB.x . .
## hyper.x
## .1 . .
## db.x
## .1 . .
## dyslip.x
## .1 . .
##
## nVar 1 3
## BIC -2.786e+03 -2.786e+03
## post prob 0.066 0.059
##
## 4550 observations deleted due to missingness.
imageplot.bma(bma)
model_uHCOT = glm(dp10 ~ uHCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, family = binomial, data = df_new)
logistic.display(model_uHCOT)
##
## Logistic regression predicting dp10 : 1 vs 0
##
## crude OR(95%CI) adj. OR(95%CI)
## uHCOT (cont. var.) 1 (1,1.0001) 1 (1,1.0001)
##
## age (cont. var.) 1.02 (1,1.03) 1.02 (0.99,1.05)
##
## sex: 2 vs 1 1.4 (0.76,2.57) 1.55 (0.65,3.66)
##
## educ: ref.=1
## 2 0.62 (0.18,2.16) 0.73 (0.18,2.98)
## 3 0.65 (0.21,2.01) 0.9 (0.25,3.32)
## 4 0.72 (0.25,2.05) 1.39 (0.4,4.9)
## 5 0.28 (0.08,0.98) 1.01 (0.23,4.54)
##
## masts: ref.=1
## 2 2.49 (0.78,7.87) 1.28 (0.31,5.22)
## 3 3.87 (1.76,8.51) 2.65 (1.07,6.56)
## 4 3.73 (1.14,12.17) 1.75 (0.4,7.66)
## 5 0.7 (0.26,1.94) 0.62 (0.2,1.93)
## 6 1.17 (0.33,4.14) 0.73 (0.17,3.11)
##
## PIR (cont. var.) 0.65 (0.52,0.82) 0.61 (0.45,0.82)
##
## SMQ020: 1 vs 2 1.85 (1,3.4) 1.31 (0.62,2.78)
##
## ALQ101: 1 vs 2 1.13 (0.56,2.28) 1.97 (0.83,4.71)
##
## PAQ665: 1 vs 2 0.58 (0.3,1.11) 0.68 (0.33,1.43)
##
## BMI (cont. var.) 1.03 (0.99,1.07) 1.03 (0.98,1.09)
##
## SBP (cont. var.) 0.99 (0.98,1.01) 0.97 (0.94,0.99)
##
## DBP (cont. var.) 1 (0.98,1.03) 1.02 (0.99,1.05)
##
## AST (cont. var.) 1.01 (0.99,1.04) 1.03 (0.98,1.08)
##
## ALT (cont. var.) 1 (0.98,1.02) 0.99 (0.95,1.03)
##
## Glu (cont. var.) 1.0022 (0.9939,1.0104) 0.9965 (0.983,1.0102)
##
## BUN (cont. var.) 1.04 (0.99,1.09) 1.02 (0.95,1.09)
##
## crea (cont. var.) 1.92 (0.95,3.85) 1.67 (0.6,4.61)
##
## TC (cont. var.) 1 (1,1.01) 0.38 (0.11,1.29)
##
## TG (cont. var.) 1 (1,1.01) 1.21 (0.95,1.54)
##
## HDL (cont. var.) 0.99 (0.97,1.01) 2.59 (0.77,8.71)
##
## LDL (cont. var.) 1 (1,1.01) 2.64 (0.79,8.84)
##
## apoB (cont. var.) 1.0052 (0.9936,1.017) 0.9909 (0.9497,1.034)
##
## hyper: 1 vs 0 1.63 (0.89,2.98) 1.36 (0.56,3.3)
##
## db: 1 vs 0 1.29 (0.6,2.77) 1.32 (0.41,4.29)
##
## dyslip: 1 vs 0 1.06 (0.5,2.26) 0.48 (0.16,1.46)
##
## P(Wald's test) P(LR-test)
## uHCOT (cont. var.) 0.064 0.073
##
## age (cont. var.) 0.19 0.191
##
## sex: 2 vs 1 0.319 0.318
##
## educ: ref.=1 0.787
## 2 0.657
## 3 0.88
## 4 0.607
## 5 0.985
##
## masts: ref.=1 0.235
## 2 0.733
## 3 0.035
## 4 0.456
## 5 0.406
## 6 0.676
##
## PIR (cont. var.) 0.001 < 0.001
##
## SMQ020: 1 vs 2 0.48 0.479
##
## ALQ101: 1 vs 2 0.127 0.115
##
## PAQ665: 1 vs 2 0.309 0.303
##
## BMI (cont. var.) 0.222 0.227
##
## SBP (cont. var.) 0.019 0.014
##
## DBP (cont. var.) 0.284 0.263
##
## AST (cont. var.) 0.276 0.288
##
## ALT (cont. var.) 0.588 0.58
##
## Glu (cont. var.) 0.613 0.601
##
## BUN (cont. var.) 0.593 0.596
##
## crea (cont. var.) 0.327 0.325
##
## TC (cont. var.) 0.122 0.117
##
## TG (cont. var.) 0.121 0.116
##
## HDL (cont. var.) 0.123 0.118
##
## LDL (cont. var.) 0.116 0.111
##
## apoB (cont. var.) 0.675 0.678
##
## hyper: 1 vs 0 0.496 0.496
##
## db: 1 vs 0 0.643 0.646
##
## dyslip: 1 vs 0 0.196 0.201
##
## Log-likelihood = -133.5403
## No. of observations = 607
## AIC value = 333.0805
#############
summary_model <- summary(model_uHCOT)
# Extracting p-values
p_values <- summary_model$coefficients[, "Pr(>|z|)"]
# Sorting p-values and names in ascending order
sorted_p_values <- sort(p_values)
sorted_names <- names(p_values)[order(p_values)]
# Creating a bar plot in ascending order of -log(p-values)
barplot(-log(sorted_p_values), names.arg = sorted_names, horiz = TRUE, las = 1, main = "Variable Significance - based on p-value")
##################
bma <- bic.glm(dp10 ~ uHCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data = df_new, glm.family = "binomial")
summary(bma)
##
## Call:
## bic.glm.formula(f = dp10 ~ uHCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data = df_new, glm.family = "binomial")
##
##
## 29 models were selected
## Best 5 models (cumulative posterior probability = 0.5264 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 -2.238e+00 7.389e-01 -1.797e+00 -1.962e+00 -3.071e+00
## uHCOT.x 20.5 1.447e-05 3.148e-05 . 7.007e-05 .
## age.x 11.0 2.217e-03 7.210e-03 . . .
## sex.x 1.2
## .2 8.262e-03 8.700e-02 . . .
## educ.x 0.0
## .2 0.000e+00 0.000e+00 . . .
## .3 0.000e+00 0.000e+00 . . .
## .4 0.000e+00 0.000e+00 . . .
## .5 0.000e+00 0.000e+00 . . .
## masts.x 0.0
## .2 0.000e+00 0.000e+00 . . .
## .3 0.000e+00 0.000e+00 . . .
## .4 0.000e+00 0.000e+00 . . .
## .5 0.000e+00 0.000e+00 . . .
## .6 0.000e+00 0.000e+00 . . .
## PIR.x 91.9 -3.522e-01 1.643e-01 -3.788e-01 -3.716e-01 -4.027e-01
## SMQ020.x 6.7
## .1 4.631e-02 1.979e-01 . . .
## ALQ101.x 1.4
## .1 4.290e-03 6.155e-02 . . .
## PAQ665.x 12.0
## .1 -8.559e-02 2.709e-01 . . .
## BMI.x 1.8 4.309e-04 4.450e-03 . . .
## SBP.x 1.4 -1.213e-04 1.729e-03 . . .
## DBP.x 1.2 9.073e-05 1.773e-03 . . .
## AST.x 0.0 0.000e+00 0.000e+00 . . .
## ALT.x 1.4 -1.445e-04 2.170e-03 . . .
## Glu.x 1.2 4.902e-05 8.304e-04 . . .
## BUN.x 1.8 6.015e-04 6.063e-03 . . .
## crea.x 12.6 1.913e-01 5.709e-01 . . 1.492e+00
## TC.x 2.3 1.188e-04 9.982e-04 . . .
## TG.x 1.9 5.168e-05 4.970e-04 . . .
## HDL.x 0.0 0.000e+00 0.000e+00 . . .
## LDL.x 2.0 1.047e-04 9.865e-04 . . .
## apoB.x 1.7 1.174e-04 1.258e-03 . . .
## hyper.x 2.5
## .1 1.179e-02 9.361e-02 . . .
## db.x 0.0
## .1 0.000e+00 0.000e+00 . . .
## dyslip.x 1.4
## .1 5.155e-03 7.053e-02 . . .
##
## nVar 1 2 2
## BIC -2.786e+03 -2.784e+03 -2.784e+03
## post prob 0.232 0.103 0.076
## model 4 model 5
## Intercept -2.672e+00 -1.588e+00
## uHCOT.x . .
## age.x 1.968e-02 .
## sex.x
## .2 . .
## educ.x
## .2 . .
## .3 . .
## .4 . .
## .5 . .
## masts.x
## .2 . .
## .3 . .
## .4 . .
## .5 . .
## .6 . .
## PIR.x -4.202e-01 -3.609e-01
## SMQ020.x
## .1 . .
## ALQ101.x
## .1 . .
## PAQ665.x
## .1 . -7.049e-01
## BMI.x . .
## SBP.x . .
## DBP.x . .
## AST.x . .
## ALT.x . .
## Glu.x . .
## BUN.x . .
## crea.x . .
## TC.x . .
## TG.x . .
## HDL.x . .
## LDL.x . .
## apoB.x . .
## hyper.x
## .1 . .
## db.x
## .1 . .
## dyslip.x
## .1 . .
##
## nVar 2 2
## BIC -2.783e+03 -2.783e+03
## post prob 0.062 0.055
##
## 4550 observations deleted due to missingness.
imageplot.bma(bma)
library(Epi)
ROC(form = dp10 ~ sCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data=df_new, plot = "ROC", lwd=2)
ROC(form = dp10 ~ sHCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data=df_new, plot = "ROC", lwd=2)
ROC(form = dp10 ~ uCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data=df_new, plot = "ROC", lwd=2)
ROC(form = dp10 ~ uHCOT + age + sex + educ + masts + PIR + SMQ020 + ALQ101 + PAQ665 + BMI + SBP + DBP + AST + ALT + Glu + BUN + crea + TC + TG + HDL + LDL + apoB + hyper + db + dyslip, data=df_new, plot = "ROC", lwd=2)