library("poLCA");library(tidyverse);library(corrr);library(dplyr); library(ggplot2); library(reshape2);library(gtsummary);library(nnet);library(pROC)
Type 'citation("pROC")' for a citation.
Attaching package: ‘pROC’
The following objects are masked from ‘package:stats’:
cov, smooth, var
#clean gender sex
non_matching_gender <- which(df0$ED_GenderBirthCert != df0$ED_GenderNow)
print("Values at non-matching indices:");
[1] "Values at non-matching indices:"
print(df0[non_matching_gender, c("ED_GenderBirthCert", "ED_GenderNow")])
#There are 18 transgender patients. I'm going to remove them bc we don;t know if they have used HRT
#clean gender
dfb <- df0[df0$ED_GenderBirthCert == df0$ED_GenderNow, ]
dfb <- dfb[,c(7:11,289)]
unique(dfb$ED_GenderNow)
[1] "FEMALE" "MALE" NA
#clean marital status
table(dfb$ED_Marital)
-5 -7 -8 Annulled Divorced Married Never been married
4 21 6 6 516 936 2950
Separated Widowed
207 80
#remove and confirm unknown marital status
dfb <- dfb[!(dfb$ED_Marital %in% c(-5, -7, -8)), ]
unique(dfb$ED_Marital)
[1] "Never been married" "Married" "Divorced" "Widowed" "Separated" "Annulled"
[7] NA
#clean race
unique(dfb$ED_RaceEthCode)
[1] "Hispanic" "Non-Hispanic Black" "Non-Hispanic White" "Non-Hispanic Other" NA
#this is fine
#clean education
#table(dfb$ED_HighestGrade)
dfb <- dfb[!(dfb$ED_HighestGrade %in% c(-5, -7, -8)), ]
table(dfb$ED_HighestGrade)
10th grade 11th grade
78 195
12th grade, no diploma 1st grade
182 6
4th grade 7th grade
1 4
8th grade 9th grade
10 46
Associate degree: Academic program Associate degree: Occupational, technical, or vocational program
285 339
Bachelor's degree: BA, AB, BS, BBA Doctoral degree: PhD, EdD
638 33
GED or equivalent High school graduate
283 1012
Master's degree: MA, MS, MEng, MEd, MBA Never attended/kindergarten only
226 3
Professional school degree: MD, DDS, DVM, JD Some college, no degree
30 1317
#this needs to be converted to fewer categories
#summarize
dfb %>%
tbl_summary(missing = "no")
| Characteristic | N = 4,6891 |
|---|---|
| ED_GenderNow | |
| FEMALE | 2,888 (62%) |
| MALE | 1,800 (38%) |
| ED_Age | 32 (25, 46) |
| ED_Marital | |
| Annulled | 6 (0.1%) |
| Divorced | 515 (11%) |
| Married | 934 (20%) |
| Never been married | 2,946 (63%) |
| Separated | 207 (4.4%) |
| Widowed | 80 (1.7%) |
| ED_RaceEthCode | |
| Hispanic | 483 (10%) |
| Non-Hispanic Black | 2,422 (52%) |
| Non-Hispanic Other | 167 (3.6%) |
| Non-Hispanic White | 1,609 (34%) |
| ED_HighestGrade | |
| 10th grade | 78 (1.7%) |
| 11th grade | 195 (4.2%) |
| 12th grade, no diploma | 182 (3.9%) |
| 1st grade | 6 (0.1%) |
| 4th grade | 1 (<0.1%) |
| 7th grade | 4 (<0.1%) |
| 8th grade | 10 (0.2%) |
| 9th grade | 46 (1.0%) |
| Associate degree: Academic program | 285 (6.1%) |
| Associate degree: Occupational, technical, or vocational program | 339 (7.2%) |
| Bachelor's degree: BA, AB, BS, BBA | 638 (14%) |
| Doctoral degree: PhD, EdD | 33 (0.7%) |
| GED or equivalent | 283 (6.0%) |
| High school graduate | 1,012 (22%) |
| Master's degree: MA, MS, MEng, MEd, MBA | 226 (4.8%) |
| Never attended/kindergarten only | 3 (<0.1%) |
| Professional school degree: MD, DDS, DVM, JD | 30 (0.6%) |
| Some college, no degree | 1,317 (28%) |
| M6_Pain_Widespread_2016 | 1,092 (40%) |
| 1 n (%); Median (IQR) | |
dfb %>%
tbl_summary(by = M6_Pain_Widespread_2016,missing = "no") %>%
add_p(workspace = 2e8) %>%
add_overall() %>%
modify_header(label ~ "**2016_Pain Outcome**") %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Treatment Received**")
1977 observations missing `M6_Pain_Widespread_2016` have been removed. To include these observations, use `forcats::fct_na_value_to_level()` on `M6_Pain_Widespread_2016` column before passing to `tbl_summary()`.
`...` must be empty.
✖ Problematic argument:
• workspace = 2e+08There was an error in 'add_p()/add_difference()' for variable 'ED_Marital', p-value omitted:
Error in stats::fisher.test(c("Never been married", "Married", "Never been married", : FEXACT error 7(location). LDSTP=18150 is too small for this problem,
(pastp=312.06, ipn_0:=ipoin[itp=430]=2477, stp[ipn_0]=312.474).
Increase workspace or consider using 'simulate.p.value=TRUE'
There was an error in 'add_p()/add_difference()' for variable 'ED_HighestGrade', p-value omitted:
Error in stats::fisher.test(c("High school graduate", "Some college, no degree", : FEXACT error 7(location). LDSTP=18150 is too small for this problem,
(pastp=21.325, ipn_0:=ipoin[itp=366]=2819, stp[ipn_0]=14.9495).
Increase workspace or consider using 'simulate.p.value=TRUE'
| 2016_Pain Outcome | Overall, N = 2,7121 | Treatment Received | p-value2 | |
|---|---|---|---|---|
| 0, N = 1,6201 | 1, N = 1,0921 | |||
| ED_GenderNow | <0.001 | |||
| FEMALE | 1,800 (66%) | 1,035 (64%) | 765 (70%) | |
| MALE | 912 (34%) | 585 (36%) | 327 (30%) | |
| ED_Age | 35 (26, 49) | 32 (25, 46) | 40 (29, 51) | <0.001 |
| ED_Marital | ||||
| Annulled | 1 (<0.1%) | 0 (0%) | 1 (<0.1%) | |
| Divorced | 326 (12%) | 204 (13%) | 122 (11%) | |
| Married | 603 (22%) | 334 (21%) | 269 (25%) | |
| Never been married | 1,608 (59%) | 998 (62%) | 610 (56%) | |
| Separated | 130 (4.8%) | 66 (4.1%) | 64 (5.9%) | |
| Widowed | 44 (1.6%) | 18 (1.1%) | 26 (2.4%) | |
| ED_RaceEthCode | <0.001 | |||
| Hispanic | 208 (7.7%) | 130 (8.0%) | 78 (7.2%) | |
| Non-Hispanic Black | 1,471 (54%) | 815 (50%) | 656 (60%) | |
| Non-Hispanic Other | 76 (2.8%) | 56 (3.5%) | 20 (1.8%) | |
| Non-Hispanic White | 954 (35%) | 618 (38%) | 336 (31%) | |
| ED_HighestGrade | ||||
| 10th grade | 43 (1.6%) | 21 (1.3%) | 22 (2.0%) | |
| 11th grade | 114 (4.2%) | 54 (3.3%) | 60 (5.5%) | |
| 12th grade, no diploma | 97 (3.6%) | 46 (2.8%) | 51 (4.7%) | |
| 1st grade | 5 (0.2%) | 4 (0.2%) | 1 (<0.1%) | |
| 7th grade | 3 (0.1%) | 0 (0%) | 3 (0.3%) | |
| 8th grade | 6 (0.2%) | 2 (0.1%) | 4 (0.4%) | |
| 9th grade | 29 (1.1%) | 16 (1.0%) | 13 (1.2%) | |
| Associate degree: Academic program | 172 (6.3%) | 80 (4.9%) | 92 (8.4%) | |
| Associate degree: Occupational, technical, or vocational program | 200 (7.4%) | 126 (7.8%) | 74 (6.8%) | |
| Bachelor's degree: BA, AB, BS, BBA | 420 (15%) | 280 (17%) | 140 (13%) | |
| Doctoral degree: PhD, EdD | 28 (1.0%) | 19 (1.2%) | 9 (0.8%) | |
| GED or equivalent | 119 (4.4%) | 73 (4.5%) | 46 (4.2%) | |
| High school graduate | 523 (19%) | 316 (20%) | 207 (19%) | |
| Master's degree: MA, MS, MEng, MEd, MBA | 155 (5.7%) | 109 (6.7%) | 46 (4.2%) | |
| Never attended/kindergarten only | 1 (<0.1%) | 1 (<0.1%) | 0 (0%) | |
| Professional school degree: MD, DDS, DVM, JD | 19 (0.7%) | 12 (0.7%) | 7 (0.6%) | |
| Some college, no degree | 778 (29%) | 461 (28%) | 317 (29%) | |
| 1 n (%); Median (IQR) | ||||
| 2 Pearson’s Chi-squared test; Wilcoxon rank sum test | ||||
#reassign values to all be integers
df0[, 12:41] <- apply(df0[, 12:41], 2, function(col) ifelse(col == "No pain or tenderness", 0, col));
df0[, 12:41] <- apply(df0[, 12:41], 2, function(col) ifelse(col == "Severe pain or tenderness", 10, col))
#remove -8 i don't know values
#remove NA cases
df <- subset(df0, !apply(df0[, 12:41], 1, function(x) any(x == -8)))
df <- df[complete.cases(df[, c(12:41)]), ]
#increment by 1 for LCA
df[, 12:41] <- lapply(df[, 12:41], as.integer)
df[, 12:41] <- df[, 12:41] + 1
#1 = no pain # 10= severe pain
table(df[,14])
1 2 3 4 5 6 7 8 9 10 11
3518 150 115 113 96 93 61 91 79 35 95
#looking at some m6 outcomes
table(df0$M6_Pain_YN_COUNT) #number of body parts with pain
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
735 63 115 114 128 139 118 107 113 107 90 64 95 77 49 56 74 88 355
table(df0$M6_Pain_C) #number of body parts with worsening pain?
0 1 2 3 4 5 6 7 8 9 10
708 194 186 272 226 244 251 196 199 106 174
table(df0$M6_Pain_MdSv_COUNT) #number of body parts with moderate to severe pain?
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1144 206 189 143 139 100 114 83 67 98 58 32 44 20 48 39 21 45 97
table(df0$M6_Pain_Widespread_2016) #unclear - Zinchuk 2016 questionnaire? #col 289
0 1
1630 1115
#subset to just pain features and 2016 pain classifier
df <- df[,c(12:41,289)]
dfx <- df[complete.cases(df[,]), ] #remove about 2k cases for complete cases
#sapply(df, typeof)
f2a <- cbind(ED_NowPain_Head, ED_NowPain_Neck, ED_NowPain_Jaw, ED_NowPain_LeftShoulder, ED_NowPain_RightShoulder, ED_NowPain_LeftUpperArm, ED_NowPain_RightUpperArm, ED_NowPain_LeftLowerArm, ED_NowPain_RightLowerArm, ED_NowPain_Chest, ED_NowPain_UpperBack, ED_NowPain_LowerBack, ED_NowPain_Abdomen, ED_NowPain_Genital, ED_NowPain_LeftHipUpperLeg, ED_NowPain_RightHipUpperLeg, ED_NowPain_LeftLowerLeg, ED_NowPain_RightLowerLeg, ED_NowPain_YN_COUNT, ED_Pain_YN_COUNT, ED_NowPain_MdSv_COUNT, ED_Pain_MdSv_COUNT, ED_NowPain_Widespread_1990, ED_NowPain_Widespread_2016, ED_NowPain_Widespread_7, ED_Pain_Widespread_1990, ED_Pain_Widespread_2016, ED_Pain_Widespread_7, ED_Pain_C, ED_NowPain_C)~M6_Pain_Widespread_2016
nes2a <- poLCA(f2a,dfx,nclass=2)
Conditional item response (column) probabilities,
by outcome variable, for each class (row)
$ED_NowPain_Head
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.4808 0.0354 0.0658 0.0526 0.0518 0.0490 0.0533 0.0668 0.0510 0.0210 0.0725
class 2: 0.1554 0.0257 0.0877 0.0880 0.0480 0.1108 0.0662 0.1327 0.0938 0.0539 0.1377
$ED_NowPain_Neck
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.4925 0.0318 0.0483 0.0481 0.0552 0.0745 0.0687 0.0669 0.0565 0.0297 0.0278
class 2: 0.0984 0.0390 0.0560 0.0791 0.0723 0.1197 0.0913 0.1067 0.1137 0.0787 0.1452
$ED_NowPain_Jaw
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8882 0.0137 0.0153 0.0181 0.0104 0.0087 0.0068 0.0087 0.0127 0.0044 0.0130
class 2: 0.6047 0.0859 0.0417 0.0402 0.0354 0.0425 0.0135 0.0424 0.0347 0.0186 0.0405
$ED_NowPain_LeftShoulder
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.7334 0.0123 0.0281 0.0341 0.0311 0.0423 0.0306 0.0266 0.026 0.0154 0.0201
class 2: 0.2442 0.0508 0.0452 0.0704 0.0868 0.0992 0.0762 0.0891 0.087 0.0498 0.1011
$ED_NowPain_RightShoulder
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.7890 0.0099 0.0179 0.0316 0.0185 0.0307 0.0235 0.0212 0.0280 0.0101 0.0196
class 2: 0.2796 0.0714 0.0633 0.0414 0.0613 0.1030 0.0623 0.0909 0.0723 0.0660 0.0885
$ED_NowPain_LeftUpperArm
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8315 0.0224 0.0164 0.0148 0.0266 0.0237 0.0136 0.0130 0.0142 0.0026 0.0212
class 2: 0.3483 0.0475 0.0813 0.0560 0.0622 0.0972 0.0570 0.0808 0.0684 0.0330 0.0682
$ED_NowPain_RightUpperArm
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8652 0.0142 0.0161 0.0142 0.0093 0.0248 0.0194 0.0150 0.0087 0.0062 0.0068
class 2: 0.3696 0.0508 0.0694 0.0819 0.0684 0.0631 0.0525 0.0723 0.0455 0.0487 0.0778
$ED_NowPain_LeftLowerArm
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8706 0.0118 0.0129 0.0245 0.0069 0.0180 0.0099 0.0186 0.0099 0.0045 0.0125
class 2: 0.4570 0.0735 0.0593 0.0658 0.0547 0.0673 0.0570 0.0435 0.0436 0.0226 0.0558
$ED_NowPain_RightLowerArm
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8635 0.0093 0.0181 0.0206 0.0191 0.0176 0.0101 0.0149 0.0062 0.0051 0.0155
class 2: 0.4626 0.0611 0.0692 0.0702 0.0394 0.0711 0.0443 0.0508 0.0342 0.0319 0.0653
$ED_NowPain_Chest
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8089 0.0129 0.0197 0.0313 0.0196 0.0211 0.0156 0.0245 0.0199 0.0120 0.0146
class 2: 0.3624 0.0333 0.0738 0.0699 0.0626 0.1056 0.0723 0.0636 0.0620 0.0235 0.0710
$ED_NowPain_UpperBack
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.6754 0.0180 0.0374 0.0487 0.0431 0.0429 0.0194 0.0421 0.0315 0.0111 0.0303
class 2: 0.1445 0.0258 0.0389 0.0759 0.0988 0.1096 0.0939 0.1027 0.0798 0.0777 0.1524
$ED_NowPain_LowerBack
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.5460 0.0234 0.0388 0.0317 0.0433 0.0589 0.0574 0.0502 0.0540 0.0328 0.0636
class 2: 0.1342 0.0334 0.0470 0.0609 0.0528 0.0734 0.0946 0.1211 0.1117 0.0768 0.1941
$ED_NowPain_Abdomen
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.9033 0.0138 0.0111 0.0160 0.0069 0.0074 0.0133 0.0068 0.0081 0.0042 0.0092
class 2: 0.4437 0.0391 0.0468 0.0758 0.0485 0.0892 0.0585 0.0913 0.0290 0.0293 0.0489
$ED_NowPain_Genital
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.9604 0.0006 0.0038 0.0062 0.0037 0.0086 0.0074 0.0006 0.0050 0.0012 0.0025
class 2: 0.6912 0.0270 0.0393 0.0466 0.0145 0.0291 0.0290 0.0311 0.0342 0.0270 0.0311
$ED_NowPain_LeftHipUpperLeg
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.7801 0.0109 0.0203 0.0265 0.0237 0.0353 0.0273 0.0160 0.0284 0.0075 0.024
class 2: 0.3629 0.0409 0.0469 0.0583 0.0556 0.0901 0.0611 0.0872 0.0695 0.0465 0.081
$ED_NowPain_RightHipUpperLeg
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.7907 0.0083 0.0272 0.0312 0.0257 0.0263 0.0194 0.0221 0.0211 0.0063 0.0217
class 2: 0.3566 0.0379 0.0644 0.0534 0.0771 0.0906 0.0515 0.0697 0.0828 0.0361 0.0798
$ED_NowPain_LeftLowerLeg
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8355 0.0051 0.0199 0.0139 0.0132 0.0273 0.0254 0.0093 0.0223 0.0057 0.0224
class 2: 0.4733 0.0433 0.0723 0.0730 0.0547 0.0589 0.0674 0.0332 0.0404 0.0246 0.0589
$ED_NowPain_RightLowerLeg
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.8555 0.0076 0.0106 0.0235 0.0235 0.0156 0.0111 0.0117 0.0167 0.0080 0.0161
class 2: 0.4958 0.0463 0.0663 0.0664 0.0477 0.0505 0.0549 0.0311 0.0581 0.0342 0.0487
$ED_NowPain_YN_COUNT
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19)
class 1: 0.0142 0.112 0.1435 0.1423 0.1559 0.1479 0.1516 0.1296 0.0030 0.0000 0.0000 0.000 0.0000 0.0000 0.0000 0.0000 0.0000 0.000 0.000
class 2: 0.0000 0.000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0452 0.1422 0.1627 0.1223 0.084 0.0954 0.0902 0.0674 0.0332 0.0425 0.058 0.057
$ED_Pain_YN_COUNT
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19)
class 1: 0.5232 0.1002 0.1107 0.0644 0.0635 0.0416 0.0368 0.0158 0.0160 0.0031 0.0025 0.0026 0.0023 0.0031 0.0024 0.0031 0.0037 0.0037 0.0012
class 2: 0.3424 0.0591 0.0695 0.0600 0.0605 0.0288 0.0275 0.0368 0.0385 0.0415 0.0248 0.0174 0.0273 0.0249 0.0240 0.0187 0.0311 0.0197 0.0477
$ED_NowPain_MdSv_COUNT
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19)
class 1: 0.1034 0.1800 0.1954 0.1651 0.1431 0.0994 0.0772 0.0363 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
class 2: 0.0081 0.0136 0.0199 0.0509 0.0525 0.0625 0.0977 0.1009 0.1327 0.1057 0.0933 0.0736 0.0456 0.0477 0.0259 0.0187 0.0218 0.0166 0.0124
$ED_Pain_MdSv_COUNT
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19)
class 1: 0.6601 0.1083 0.0793 0.0506 0.0350 0.0181 0.0194 0.0086 0.0056 0.0025 0.0017 0.0014 0.0006 0.0031 0.0012 0.0043 0.0000 0.0000 0.0000
class 2: 0.5173 0.0911 0.0568 0.0458 0.0418 0.0340 0.0183 0.0239 0.0197 0.0228 0.0240 0.0153 0.0114 0.0207 0.0124 0.0073 0.0155 0.0021 0.0197
$ED_NowPain_Widespread_1990
Pr(1) Pr(2)
class 1: 0.8074 0.1926
class 2: 0.1525 0.8475
$ED_NowPain_Widespread_2016
Pr(1) Pr(2)
class 1: 0.9262 0.0738
class 2: 0.2841 0.7159
$ED_NowPain_Widespread_7
Pr(1) Pr(2)
class 1: 0.9705 0.0295
class 2: 0.0119 0.9881
$ED_Pain_Widespread_1990
Pr(1) Pr(2)
class 1: 0.9077 0.0923
class 2: 0.6478 0.3522
$ED_Pain_Widespread_2016
Pr(1) Pr(2)
class 1: 0.9541 0.0459
class 2: 0.7266 0.2734
$ED_Pain_Widespread_7
Pr(1) Pr(2)
class 1: 0.9483 0.0517
class 2: 0.6699 0.3301
$ED_Pain_C
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0.4871 0.0948 0.0658 0.0814 0.0510 0.0615 0.0421 0.0338 0.0340 0.0218 0.0267
class 2: 0.3138 0.0744 0.0960 0.0917 0.0732 0.0793 0.0383 0.0377 0.0819 0.0453 0.0683
$ED_NowPain_C
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10) Pr(11)
class 1: 0 0.0235 0.0496 0.0741 0.1108 0.1088 0.1482 0.1410 0.1549 0.0657 0.1234
class 2: 0 0.0042 0.0111 0.0262 0.0528 0.0759 0.1228 0.2033 0.1934 0.1222 0.1883
Estimated class population shares
0.6262 0.3738
Predicted class memberships (by modal posterior prob.)
0.6257 0.3743
=========================================================
Fit for 2 latent classes:
=========================================================
2 / 1
=========================================================
number of observations: 2581
number of estimated parameters: 558
residual degrees of freedom: 2023
maximum log-likelihood: -101856.8
AIC(2): 204829.6
BIC(2): 208097.2
X^2(2): 1.107972e+30 (Chi-square goodness of fit)
pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)
matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
main="Pain Reports as a predictor of outcome class",
xlab="Strong recovery (1) to strong chronic pain (7)",
ylab="Probability of latent class membership",lwd=2,col=1)
gghisto <- list(
theme(axis.text.x = element_text(size=18, color = "Navyblue"),
axis.text.y = element_text(face="bold",
size=14, angle= 30, color = "Navyblue"),
axis.title=element_text(size=17),
plot.title = element_text(size=17,face="bold")))
elements0 <- list(
ggtitle("Density Distribution of Pain features"),
ylab("Density"),
xlab("Self-Reported Pain Level"),
geom_density(alpha=.07)
)
df.plot0 <- melt(dfx[,1:9])
No id variables; using all as measure variables
#table(df.plot)
df.plot0 <- subset(df.plot0, value != 1)
p0 <- ggplot(aes(x = value, colour = variable, fill = variable), data = df.plot0) +
elements0 +
gghisto
df.plot1 <- melt(dfx[,10:18])
No id variables; using all as measure variables
df.plot1 <- subset(df.plot1, value != 1)
p1 <- ggplot(aes(x = value, colour = variable, fill = variable), data = df.plot1) +
elements0 +
gghisto
df.plot2 <- melt(dfx[,1:18])
No id variables; using all as measure variables
df.plot2 <- subset(df.plot2, value != 1)
p2 <- ggplot(aes(x = value, colour = variable, fill = variable), data = df.plot2) +
elements0 +
gghisto
p0;p1;p2
ggplot(na.omit(dfb), aes(x = ED_Age, fill = factor(`M6_Pain_Widespread_2016`))) +
geom_density(alpha = 0.5) +
labs(title = "Density Plot of Age by M6 Pain Response",
x = "Age",
y = "Density") +
scale_fill_manual(values = c("hotpink", "lightskyblue"))+
gghisto
ggplot(na.omit(dfx), aes(x = ED_NowPain_MdSv_COUNT, fill = factor(`M6_Pain_Widespread_2016`))) +
geom_density(alpha = 0.5) +
labs(title = "Density Plot of Now Pain x M6 Pain ",
x = "ED NOW PAIN",
y = "Density") +
scale_fill_manual(values = c("hotpink", "lightskyblue"))+
gghisto
lapply(df[, 42:76], function(col) sort(unique(col), decreasing = TRUE, na.last = TRUE))
Error in `[.data.frame`(df, , 42:76) : undefined columns selected
correlate(df0[,c(2:5,30:50)])
Non-numeric variables removed from input: `ED_NowPain_YN_COUNT`, `ED_Pain_YN_COUNT`, `ED_NowPain_MdSv_COUNT`, `ED_Pain_MdSv_COUNT`, `ED_NowPain_Widespread_1990`, `ED_NowPain_Widespread_2016`, `ED_NowPain_Widespread_7`, `ED_Pain_Widespread_1990`, `ED_Pain_Widespread_2016`, `ED_Pain_Widespread_7`, `ED_Pain_C`, and `ED_NowPain_C`Correlation computed with
• Method: 'pearson'
• Missing treated using: 'pairwise.complete.obs'
dfx$M6_Pain_Widespread_2016 <- as.factor(dfx$M6_Pain_Widespread_2016)
# Check the levels of the factor
levels(dfx$M6_Pain_Widespread_2016)
# Fit logistic regression model
logit_model <- glm(M6_Pain_Widespread_2016 ~ ., data = dfx, family = binomial)
# Summary of the model
summary(logit_model)
#filter significant features
coefficients_table <- summary(logit_model)$coefficients
# Filter coefficients table based on significance level
significant_features <- coefficients_table[coefficients_table[, "Pr(>|z|)"] < 0.01, ]
# View the significant features
print(significant_features)
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.21531944 0.36544644 -8.798333 1.388637e-18
ED_NowPain_Jaw -0.06318017 0.02128283 -2.968599 2.991608e-03
ED_NowPain_RightShoulder -0.06374031 0.02137345 -2.982220 2.861665e-03
ED_NowPain_LowerBack 0.04231805 0.01573973 2.688614 7.174931e-03
ED_NowPain_MdSv_COUNT 0.14014139 0.04243127 3.302785 9.572967e-04
ED_Pain_MdSv_COUNT -0.08215910 0.02830421 -2.902716 3.699417e-03
ED_Pain_Widespread_2016 0.92097171 0.25563642 3.602662 3.149745e-04
ED_Pain_C 0.09180860 0.02004184 4.580847 4.630975e-06
exp(0.92097171)
[1] 2.51173
exp(0.14014139)
[1] 1.150436
The ED_Pain_Widespread_2016 feature is interesting. This
means that the likelihood of the 6 month outcome being a 1 instead of a
0 increases by a factor of approximately \(exp(0.92)\) = 2.51x. For
ED_NowPain_MdSv_COUNT, the increase is \(exp(0.14)\) = 1.15.
BIC(logit_model)
[1] 3216.773
feature selection
summary(stepwise_model)
Call:
glm(formula = M6_Pain_Widespread_2016 ~ ED_NowPain_Neck + ED_NowPain_Jaw +
ED_NowPain_RightShoulder + ED_NowPain_LeftLowerArm + ED_NowPain_LowerBack +
ED_NowPain_Abdomen + ED_NowPain_LeftHipUpperLeg + ED_NowPain_RightHipUpperLeg +
ED_Pain_YN_COUNT + ED_NowPain_MdSv_COUNT + ED_Pain_MdSv_COUNT +
ED_NowPain_Widespread_1990 + ED_NowPain_Widespread_7 + ED_Pain_Widespread_1990 +
ED_Pain_Widespread_2016 + ED_Pain_C + ED_NowPain_C, family = binomial,
data = dfx)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.35534 0.32506 -10.322 < 2e-16 ***
ED_NowPain_Neck 0.02377 0.01636 1.453 0.146164
ED_NowPain_Jaw -0.06458 0.02037 -3.171 0.001522 **
ED_NowPain_RightShoulder -0.06999 0.01790 -3.910 9.24e-05 ***
ED_NowPain_LeftLowerArm -0.03231 0.01979 -1.633 0.102474
ED_NowPain_LowerBack 0.04420 0.01442 3.065 0.002180 **
ED_NowPain_Abdomen -0.03991 0.02007 -1.988 0.046769 *
ED_NowPain_LeftHipUpperLeg -0.03071 0.01688 -1.820 0.068826 .
ED_NowPain_RightHipUpperLeg 0.04436 0.01677 2.645 0.008161 **
ED_Pain_YN_COUNT 0.04808 0.02770 1.736 0.082528 .
ED_NowPain_MdSv_COUNT 0.16557 0.03029 5.467 4.58e-08 ***
ED_Pain_MdSv_COUNT -0.08583 0.02793 -3.073 0.002120 **
ED_NowPain_Widespread_1990 -0.31922 0.12322 -2.591 0.009581 **
ED_NowPain_Widespread_7 0.30888 0.13990 2.208 0.027252 *
ED_Pain_Widespread_1990 0.39927 0.20187 1.978 0.047943 *
ED_Pain_Widespread_2016 0.89649 0.24962 3.591 0.000329 ***
ED_Pain_C 0.09186 0.01973 4.657 3.21e-06 ***
ED_NowPain_C 0.05162 0.02381 2.168 0.030192 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 3495.2 on 2580 degrees of freedom
Residual deviance: 2978.7 on 2563 degrees of freedom
AIC: 3014.7
Number of Fisher Scoring iterations: 4