select puf.*, cpttab.cpt
from nsqip.puf
inner join
(select caseid, ANY_VALUE(nproc) nproc, ANY_VALUE(cpt) cpt
from nsqip.puf_cpt
group by caseid
having count(1) = 1
and cpt in ('49650', '49505', '49525') and nproc = 1
) as cpttab
on puf.caseid = cpttab.caseidThe NSQIP SQL database was queried as above. This returned all patients with CPT codes of 49650, 49505, and 49525 without concurrent or other procedures documented.
The above code included only patients with the following criteria:
The above code categorizes patients into a Dindo-Clavien class according to the following schema:
dindo_1 <- c("supinfec","wndinfd","dehis","renainsf")
dindo_2 <- c("orgspcssi","oupneumo","urninfec","pulembol","othbleed","othdvt","othsysep","othcdiff")
dindo_3 <- c("returnor")
dindo_4 <- c("reintub","failwean","oprenafl","cnscva","cdarrest","cdmi","othseshock","cnscoma","neurodef")
dindo_5 <- c("dopertod")Propensity-score matching is used for pre-processing.
The above code matches patients with and without COPD according to sex, race, age, diabetes, smoking status, dypsnea, functional status, ascites, CHF, hypertensive medicaiton use, renal failure, dialysis, steroid use, bleeding disorders, > 10% weight loss within 6 months of surgery, and laparoscopic vs. open technique. It utilizes a 1:1 nearest neighbor propensity-score match. The results of the match can be seen below:
summary(df_match)
#>
#> Call:
#> MatchIt::matchit(formula = hxcopd ~ sex + race + age + diabetes +
#> smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed +
#> renafail + dialysis + steroid + bleeddis + wtloss + lap,
#> data = df_m)
#>
#> Summary of balance for all data:
#> Means Treated Means Control SD Control
#> distance 0.1445 0.0265 0.0507
#> sexFALSE 0.1315 0.0861 0.2805
#> sexTRUE 0.8685 0.9139 0.2805
#> raceAsian 0.0202 0.0314 0.1743
#> raceBlack 0.0894 0.0957 0.2941
#> raceNative Hawaiian or Pacific islander 0.0024 0.0031 0.0553
#> raceWhite 0.8836 0.8641 0.3427
#> age 69.1024 57.4566 16.5378
#> diabetesTRUE 0.1191 0.0744 0.2624
#> smokeTRUE 0.3816 0.1765 0.3813
#> dyspneaTRUE 0.3065 0.0232 0.1507
#> fnstatus2Partially dependent 0.0162 0.0039 0.0621
#> fnstatus2Totally dependent 0.0008 0.0003 0.0181
#> ascitesTRUE 0.0034 0.0014 0.0368
#> hxchfTRUE 0.0278 0.0030 0.0544
#> hypermedTRUE 0.6346 0.3695 0.4827
#> renafailTRUE 0.0021 0.0005 0.0222
#> dialysisTRUE 0.0143 0.0053 0.0727
#> steroidTRUE 0.0774 0.0162 0.1264
#> bleeddisTRUE 0.0546 0.0202 0.1408
#> wtlossTRUE 0.0093 0.0020 0.0450
#> lapTRUE 0.2262 0.3452 0.4754
#> Mean Diff eQQ Med eQQ Mean eQQ Max
#> distance 0.1181 0.0469 0.1179 0.5011
#> sexFALSE 0.0455 0.0000 0.0453 1.0000
#> sexTRUE -0.0455 0.0000 0.0453 1.0000
#> raceAsian -0.0112 0.0000 0.0114 1.0000
#> raceBlack -0.0063 0.0000 0.0064 1.0000
#> raceNative Hawaiian or Pacific islander -0.0007 0.0000 0.0008 1.0000
#> raceWhite 0.0195 0.0000 0.0196 1.0000
#> age 11.6457 11.0000 11.6465 25.0000
#> diabetesTRUE 0.0447 0.0000 0.0446 1.0000
#> smokeTRUE 0.2051 0.0000 0.2050 1.0000
#> dyspneaTRUE 0.2833 0.0000 0.2832 1.0000
#> fnstatus2Partially dependent 0.0123 0.0000 0.0122 1.0000
#> fnstatus2Totally dependent 0.0005 0.0000 0.0003 1.0000
#> ascitesTRUE 0.0021 0.0000 0.0019 1.0000
#> hxchfTRUE 0.0249 0.0000 0.0247 1.0000
#> hypermedTRUE 0.2651 0.0000 0.2649 1.0000
#> renafailTRUE 0.0016 0.0000 0.0016 1.0000
#> dialysisTRUE 0.0090 0.0000 0.0088 1.0000
#> steroidTRUE 0.0612 0.0000 0.0610 1.0000
#> bleeddisTRUE 0.0344 0.0000 0.0342 1.0000
#> wtlossTRUE 0.0073 0.0000 0.0072 1.0000
#> lapTRUE -0.1190 0.0000 0.1191 1.0000
#>
#>
#> Summary of balance for matched data:
#> Means Treated Means Control SD Control
#> distance 0.1445 0.1439 0.1712
#> sexFALSE 0.1315 0.1246 0.3303
#> sexTRUE 0.8685 0.8754 0.3303
#> raceAsian 0.0202 0.0223 0.1476
#> raceBlack 0.0894 0.0957 0.2943
#> raceNative Hawaiian or Pacific islander 0.0024 0.0037 0.0608
#> raceWhite 0.8836 0.8735 0.3324
#> age 69.1024 69.7232 11.3670
#> diabetesTRUE 0.1191 0.1254 0.3313
#> smokeTRUE 0.3816 0.3726 0.4836
#> dyspneaTRUE 0.3065 0.3042 0.4601
#> fnstatus2Partially dependent 0.0162 0.0135 0.1155
#> fnstatus2Totally dependent 0.0008 0.0005 0.0230
#> ascitesTRUE 0.0034 0.0024 0.0488
#> hxchfTRUE 0.0278 0.0233 0.1510
#> hypermedTRUE 0.6346 0.6484 0.4775
#> renafailTRUE 0.0021 0.0013 0.0364
#> dialysisTRUE 0.0143 0.0133 0.1144
#> steroidTRUE 0.0774 0.0655 0.2474
#> bleeddisTRUE 0.0546 0.0607 0.2389
#> wtlossTRUE 0.0093 0.0069 0.0828
#> lapTRUE 0.2262 0.2190 0.4137
#> Mean Diff eQQ Med eQQ Mean eQQ Max
#> distance 0.0006 0 0.0007 0.0652
#> sexFALSE 0.0069 0 0.0069 1.0000
#> sexTRUE -0.0069 0 0.0069 1.0000
#> raceAsian -0.0021 0 0.0021 1.0000
#> raceBlack -0.0064 0 0.0064 1.0000
#> raceNative Hawaiian or Pacific islander -0.0013 0 0.0013 1.0000
#> raceWhite 0.0101 0 0.0101 1.0000
#> age -0.6208 1 0.6500 3.0000
#> diabetesTRUE -0.0064 0 0.0064 1.0000
#> smokeTRUE 0.0090 0 0.0090 1.0000
#> dyspneaTRUE 0.0024 0 0.0024 1.0000
#> fnstatus2Partially dependent 0.0027 0 0.0027 1.0000
#> fnstatus2Totally dependent 0.0003 0 0.0003 1.0000
#> ascitesTRUE 0.0011 0 0.0011 1.0000
#> hxchfTRUE 0.0045 0 0.0045 1.0000
#> hypermedTRUE -0.0138 0 0.0138 1.0000
#> renafailTRUE 0.0008 0 0.0008 1.0000
#> dialysisTRUE 0.0011 0 0.0011 1.0000
#> steroidTRUE 0.0119 0 0.0119 1.0000
#> bleeddisTRUE -0.0061 0 0.0061 1.0000
#> wtlossTRUE 0.0024 0 0.0024 1.0000
#> lapTRUE 0.0072 0 0.0072 1.0000
#>
#> Percent Balance Improvement:
#> Mean Diff. eQQ Med eQQ Mean eQQ Max
#> distance 99.4751 100.0000 99.4432 86.989
#> sexFALSE 84.8342 0.0000 84.7953 0.000
#> sexTRUE 84.8342 0.0000 84.7953 0.000
#> raceAsian 81.0867 0.0000 81.3953 0.000
#> raceBlack -1.0740 0.0000 0.0000 0.000
#> raceNative Hawaiian or Pacific islander -94.0971 0.0000 -66.6667 0.000
#> raceWhite 48.4096 0.0000 48.6486 0.000
#> age 94.6694 90.9091 94.4193 88.000
#> diabetesTRUE 85.7552 0.0000 85.7143 0.000
#> smokeTRUE 95.6037 0.0000 95.6016 0.000
#> dyspneaTRUE 99.1576 0.0000 99.1573 0.000
#> fnstatus2Partially dependent 78.4583 0.0000 78.2609 0.000
#> fnstatus2Totally dependent 43.2438 0.0000 0.0000 0.000
#> ascitesTRUE 49.3217 0.0000 42.8571 0.000
#> hxchfTRUE 81.8754 0.0000 81.7204 0.000
#> hypermedTRUE 94.7979 0.0000 94.7948 0.000
#> renafailTRUE 51.1630 0.0000 50.0000 0.000
#> dialysisTRUE 88.2156 0.0000 87.8788 0.000
#> steroidTRUE 80.4980 0.0000 80.4348 0.000
#> bleeddisTRUE 82.2672 0.0000 82.1705 0.000
#> wtlossTRUE 67.0991 0.0000 66.6667 0.000
#> lapTRUE 93.9816 0.0000 93.9866 0.000
#>
#> Sample sizes:
#> Control Treated
#> All 121834 3771
#> Matched 3771 3771
#> Unmatched 118063 0
#> Discarded 0 0If necessary, I can produce a matched pair matrix with df_match$match.matrix for matched pair analysis.
First, I will fit my unadjusted model. I use ordinal logistic regression (provided by MASS:polr package) because I am regressing on to an ordinal outcome variable (dindo) with levels 0 through 5.
unadjfit <- MASS::polr(dindo ~ hxcopd, data = df_m)
unadjfit.or <- exp(cbind(coef(unadjfit), t(confint(unadjfit))))
#> Waiting for profiling to be done...
#>
#> Re-fitting to get HessianThe model is summarized:
summary(unadjfit)
#>
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd, data = df_m)
#>
#> Coefficients:
#> Value Std. Error t value
#> hxcopdTRUE 0.4003 0.1323 3.027
#>
#> Intercepts:
#> Value Std. Error t value
#> 0|1 3.6128 0.1018 35.4742
#> 1|2 3.7429 0.1045 35.8053
#> 2|3 4.2549 0.1181 36.0240
#> 3|4 4.9046 0.1443 33.9888
#> 4|5 6.0542 0.2276 26.6017
#>
#> Residual Deviance: 2887.225
#> AIC: 2899.225polr outputs the ordinal logistic regression parameteized as:
\[logit (P(Y \le j)) = \beta_{j0} – \eta_{1}x_1 – \cdots – \eta_{p} x_p\]
Due to the parallel lines assumption, even though we have six categories, the coefficient of COPD (hxcopd) stays the same across the five categories. The two equations for hxcopd = 1 and hxcopd = 0 are
\[ \begin{eqnarray} logit (P(Y \le j | x_1=1) & = & \beta_{j0} – \eta_{1} \\ logit (P(Y \le j | x_1=0) & = & \beta_{j0} \end{eqnarray} \]
Therefore:
\[logit (P(Y \le j)|x_1=1) -logit (P(Y \le j)|x_1=0) = – \eta_{1}.\]
Normally, I could get the odds ratio by exponentiating both sides of this equation and using \(log(b)-log(a) = log(b/a)\):
\[\frac{P(Y \le j |x_1=1)}{P(Y>j|x_1=1)} / \frac{P(Y \le j |x_1=0)}{P(Y>j|x_1=0)} = exp( -\eta_{1}).\]
Which by the proportional odds assumption can be simplified:
\[\frac{P(Y \le j |x_1=1)}{P(Y>j|x_1=1)} = p_1 / (1-p_1) \]
\[\frac{P(Y \le j |x_1=0)}{P(Y>j|x_1=0)} = p_0 / (1-p_0)\]
The odds ratio is defined as:
\[\frac{p_1 / (1-p_1) }{p_0 / (1-p_0)} = exp( -\eta_{1}).\]
However, the coefficient reported by summary(unadjfit) is actually \(\eta\), not \(-\eta\). So I will have to flip the odds ratio:
Since \(exp(-\eta_{1}) = \frac{1}{exp(\eta_{1})}\),
\[exp(\eta_{1}) = \frac{p_0 / (1-p_0) }{p_1 / (1-p_1)}.\]
From the output, \(\hat{\eta}_1=0.4003126\), the odds ratio \(exp(\hat{\eta}_1)=1.4922911\) is actually \(\frac{p_0 / (1-p_0) }{p_1 / (1-p_1)}\).
This can be interpreted as “people without COPD have 1.4922911 higher odds of being in a category \(\leq J\) vs. \(>J\) when compared to patients with COPD” where \(J = \{0, 1,2,3,4,5\}\).
To make this more interpretable, you can switch around some of the signs:
\[ \begin{eqnarray} exp(-\eta_{1}) & = & \frac{p_1 / (1-p_1)}{p_0/(1-p_0)} \\ & = & \frac{p_1 (1-p_0)}{p_0(1-p_1)} \\ & = & \frac{(1-p_0)/p_0}{(1-p_1)/p_1} \\ & = & \frac{P (Y >j | x=0)/P(Y \le j|x=0)}{P(Y > j | x=1)/P(Y \le j | x=1)}. \end{eqnarray} \]
Since \(exp(-\eta_{1}) = \frac{1}{exp(\eta_{1})}\),
\[\frac{P (Y >j | x=1)/P(Y \le j|x=1)}{P(Y > j | x=0)/P(Y \le j | x=0)} = exp(\eta).\]
Instead of interpreting the odds of being in category \(\leq J\), we can interpret the odds of being in category \(>J\): “people with COPD have 1.4922911 times the odds of being in category \(>J\) compared to people without COPD.”
The 95% confidence interval can be easily calculated:
exp(confint(unadjfit))
#> Waiting for profiling to be done...
#>
#> Re-fitting to get Hessian
#> 2.5 % 97.5 %
#> 1.153489 1.938509In order to fit the adjusted model, I start by suppying all the variables include in the match:
adjfit <- MASS::polr(dindo ~ hxcopd + sex + race + age + diabetes + smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + renafail + dialysis + steroid + bleeddis + wtloss + lap, data = df_m)
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(adjfit)
#>
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd + sex + race + age + diabetes +
#> smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed +
#> renafail + dialysis + steroid + bleeddis + wtloss + lap,
#> data = df_m)
#>
#> Coefficients:
#> Value Std. Error t value
#> hxcopdTRUE 0.42863 1.342e-01 3.193e+00
#> sexTRUE -0.06410 1.940e-01 -3.303e-01
#> raceAsian -1.00369 1.245e+00 -8.063e-01
#> raceBlack 0.22738 1.036e+00 2.195e-01
#> raceNative Hawaiian or Pacific islander -10.96370 6.278e-05 -1.746e+05
#> raceWhite -0.10687 1.021e+00 -1.046e-01
#> age 0.02040 6.960e-03 2.931e+00
#> diabetesTRUE 0.57178 1.670e-01 3.424e+00
#> smokeTRUE 0.16700 1.515e-01 1.103e+00
#> dyspneaTRUE 0.17248 1.399e-01 1.233e+00
#> fnstatus2Partially dependent 0.78438 3.369e-01 2.328e+00
#> fnstatus2Totally dependent -14.00459 8.157e-09 -1.717e+09
#> ascitesTRUE 1.82496 5.805e-01 3.144e+00
#> hxchfTRUE 0.99521 2.560e-01 3.888e+00
#> hypermedTRUE 0.04481 1.508e-01 2.972e-01
#> renafailTRUE 0.35707 1.070e+00 3.338e-01
#> dialysisTRUE 0.98040 3.492e-01 2.807e+00
#> steroidTRUE 0.07941 2.370e-01 3.350e-01
#> bleeddisTRUE 0.80356 2.006e-01 4.007e+00
#> wtlossTRUE 0.97155 4.148e-01 2.342e+00
#> lapTRUE 0.24056 1.564e-01 1.538e+00
#>
#> Intercepts:
#> Value Std. Error t value
#> 0|1 5.421500e+00 1.156200e+00 4.689200e+00
#> 1|2 5.554000e+00 1.156400e+00 4.802700e+00
#> 2|3 6.074700e+00 1.157800e+00 5.246600e+00
#> 3|4 6.732100e+00 1.160900e+00 5.799100e+00
#> 4|5 7.888900e+00 1.174300e+00 6.718100e+00
#>
#> Residual Deviance: 2786.892
#> AIC: 2838.892However, note the warning. This is because the level of fnstatus2 called Totally dependent has only 5 records. Each of these records ends up with the same Dindo-Clavien classification of 0. This leads to complete separation of the variable fnstatus2Totally dependent with regards to the outcome variable dindo. The maximum likelihood estimate for fnstatus2Totally dependent does not exist. Another hint is the large coefficient of -14.0045892 and the even larger standard error.
We can resolve this complete separation by collapsing fnstatus2 into two levels (“Independent” and “Dependent”):
df_m$fnstatus2 <- factor(df_m$fnstatus2)
levels(df_m$fnstatus2) <- list(Independent = "Independent", "Dependent" = c("Partially dependent", "Totally dependent"))
adjfit <- MASS::polr(dindo ~ hxcopd + sex + race + age + diabetes + smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + renafail + dialysis + steroid + bleeddis + wtloss + lap, data = df_m)
summary(adjfit)
#>
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd + sex + race + age + diabetes +
#> smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed +
#> renafail + dialysis + steroid + bleeddis + wtloss + lap,
#> data = df_m)
#>
#> Coefficients:
#> Value Std. Error t value
#> hxcopdTRUE 0.42908 1.342e-01 3.197e+00
#> sexTRUE -0.06567 1.940e-01 -3.385e-01
#> raceAsian -1.01655 1.245e+00 -8.166e-01
#> raceBlack 0.22811 1.036e+00 2.202e-01
#> raceNative Hawaiian or Pacific islander -10.71234 8.076e-05 -1.326e+05
#> raceWhite -0.10699 1.021e+00 -1.048e-01
#> age 0.02040 6.956e-03 2.932e+00
#> diabetesTRUE 0.57169 1.670e-01 3.424e+00
#> smokeTRUE 0.16755 1.515e-01 1.106e+00
#> dyspneaTRUE 0.17326 1.398e-01 1.239e+00
#> fnstatus2Dependent 0.75604 3.359e-01 2.251e+00
#> ascitesTRUE 1.82831 5.803e-01 3.150e+00
#> hxchfTRUE 0.99739 2.559e-01 3.897e+00
#> hypermedTRUE 0.04599 1.508e-01 3.050e-01
#> renafailTRUE 0.35625 1.070e+00 3.330e-01
#> dialysisTRUE 0.98028 3.492e-01 2.807e+00
#> steroidTRUE 0.08093 2.370e-01 3.415e-01
#> bleeddisTRUE 0.80512 2.005e-01 4.015e+00
#> wtlossTRUE 0.97516 4.147e-01 2.352e+00
#> lapTRUE 0.24020 1.564e-01 1.536e+00
#>
#> Intercepts:
#> Value Std. Error t value
#> 0|1 5.4216 1.1561 4.6896
#> 1|2 5.5541 1.1564 4.8031
#> 2|3 6.0748 1.1578 5.2470
#> 3|4 6.7322 1.1608 5.7995
#> 4|5 7.8889 1.1742 6.7186
#>
#> Residual Deviance: 2787.425
#> AIC: 2837.425In order to produce the best fit, I utilize both backwards and forwards step-wise model selection by AIC:
adjfit.step <- MASS::stepAIC(adjfit, direction = "both", trace = FALSE)
summary(adjfit.step)
#>
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd + age + diabetes + fnstatus2 +
#> ascites + hxchf + dialysis + bleeddis + wtloss, data = df_m)
#>
#> Coefficients:
#> Value Std. Error t value
#> hxcopdTRUE 0.42097 0.133888 3.144
#> age 0.01624 0.006175 2.631
#> diabetesTRUE 0.57029 0.164486 3.467
#> fnstatus2Dependent 0.77247 0.336009 2.299
#> ascitesTRUE 1.73700 0.580973 2.990
#> hxchfTRUE 1.04844 0.250773 4.181
#> dialysisTRUE 1.00015 0.342407 2.921
#> bleeddisTRUE 0.81298 0.198346 4.099
#> wtlossTRUE 0.97523 0.415840 2.345
#>
#> Intercepts:
#> Value Std. Error t value
#> 0|1 5.0570 0.4572 11.0611
#> 1|2 5.1893 0.4578 11.3346
#> 2|3 5.7091 0.4612 12.3777
#> 3|4 6.3653 0.4687 13.5808
#> 4|5 7.5204 0.5007 15.0188
#>
#> Residual Deviance: 2799.438
#> AIC: 2827.438Exponentiating the coefficients to obtain odds ratios:
broom::tidy(adjfit.step, conf.int = TRUE, p.values = TRUE, exponentiate = TRUE)
#>
#> Re-fitting to get Hessian
#> # A tibble: 14 x 8
#> term estimate std.error statistic conf.low conf.high p.value coef.type
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 hxcopdTRUE 1.52 0.134 3.14 1.17 1.99 1.51e-3 coeffici~
#> 2 age 1.02 0.00618 2.63 1.00 1.03 7.67e-3 coeffici~
#> 3 diabetesT~ 1.77 0.164 3.47 1.27 2.42 9.76e-4 coeffici~
#> 4 fnstatus2~ 2.17 0.336 2.30 1.06 4.01 3.49e-2 coeffici~
#> 5 ascitesTR~ 5.68 0.581 2.99 1.57 16.1 1.14e-2 coeffici~
#> 6 hxchfTRUE 2.85 0.251 4.18 1.70 4.56 1.72e-4 coeffici~
#> 7 dialysisT~ 2.72 0.342 2.92 1.32 5.09 8.66e-3 coeffici~
#> 8 bleeddisT~ 2.25 0.198 4.10 1.50 3.28 1.60e-4 coeffici~
#> 9 wtlossTRUE 2.65 0.416 2.35 1.09 5.66 3.29e-2 coeffici~
#> 10 0|1 157. 0.457 11.1 NA NA NA scale
#> 11 1|2 179. 0.458 11.3 NA NA NA scale
#> 12 2|3 302. 0.461 12.4 NA NA NA scale
#> 13 3|4 581. 0.469 13.6 NA NA NA scale
#> 14 4|5 1845. 0.501 15.0 NA NA NA scaleThe adjusted odds ratio of 1.5234386 is very similar to the unadjusted odds ratio of 1.4922911. This is because we had very good matching which essentially negates the effect of our covariates. Nonetheless, the logistic regression is more robust and has greater power than a chi-square test. Not to mention a chi-square test does not tell us the size of the effect.
I can test the parallel regression assumption with a Brant test:
brant::brant(adjfit.step)
#> ----------------------------------------------------
#> Test for X2 df probability
#> ----------------------------------------------------
#> Omnibus -756.67 36 1
#> hxcopdTRUE 0.44 4 0.98
#> age 6.25 4 0.18
#> diabetesTRUE 1.7 4 0.79
#> fnstatus2Dependent 15.68 4 0
#> ascitesTRUE 127.76 4 0
#> hxchfTRUE 1.52 4 0.82
#> dialysisTRUE 19.19 4 0
#> bleeddisTRUE 2.21 4 0.7
#> wtlossTRUE 40.17 4 0
#> ----------------------------------------------------
#>
#> H0: Parallel Regression Assumption holds
#> Warning in brant::brant(adjfit.step): 1402 combinations in table(dv,ivs) do not
#> occur. Because of that, the test results might be invalid.The omnibus test is non-significant but there are some significant covariates. I’m still trying to figure out how to interpret whether or not this means the assumption holds.
In addition, the Hosmer and Lemeshow test is non-significant, which suggests the assumption holds:
generalhoslem::logitgof(df_m$dindo, fitted(adjfit.step), ord = TRUE)
#> Warning in generalhoslem::logitgof(df_m$dindo, fitted(adjfit.step), ord =
#> TRUE): At least one cell in the expected frequencies table is < 1. Chi-square
#> approximation may be incorrect.
#>
#> Hosmer and Lemeshow test (ordinal model)
#>
#> data: df_m$dindo, fitted(adjfit.step)
#> X-squared = 32.485, df = 44, p-value = 0.9001Alternatively, many will recommend against the use of step-wise model selection due to its potential to produce significant bias. Instead, they advocate for covariate selection based on clinical expertise and insight. For that reason, I would build a model with covariates that I really thought were strongly related to outcome. I would argue sex, race, dyspnea, weight loss, and steroid use are probably not strongly related with the outcomes we are examining. I realize you could make the argument that any of these are in some way, but I’m trying to pick out the ones I think are strongly related and also trying to minimize the number of covariates in our model.
adjfit <- MASS::polr(dindo ~ hxcopd + age + diabetes + smoke + fnstatus2 + ascites + hxchf + renafail + dialysis + steroid + bleeddis + lap, data = df_m)
broom::tidy(adjfit, conf.int = TRUE, p.values = TRUE, exponentiate = TRUE)
#>
#> Re-fitting to get Hessian
#> # A tibble: 17 x 8
#> term estimate std.error statistic conf.low conf.high p.value coef.type
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 hxcopdTR~ 1.51 0.134 3.10 1.17 1.97 1.79e-3 coeffici~
#> 2 age 1.02 0.00683 3.06 1.01 1.04 1.89e-3 coeffici~
#> 3 diabetes~ 1.77 0.165 3.48 1.27 2.43 9.40e-4 coeffici~
#> 4 smokeTRUE 1.21 0.151 1.25 0.896 1.62 2.14e-1 coeffici~
#> 5 fnstatus~ 2.29 0.335 2.48 1.13 4.23 2.39e-2 coeffici~
#> 6 ascitesT~ 6.65 0.571 3.32 1.87 18.5 6.05e-3 coeffici~
#> 7 hxchfTRUE 3.03 0.248 4.47 1.82 4.82 6.62e-5 coeffici~
#> 8 renafail~ 1.52 1.06 0.393 0.0814 8.23 7.10e-1 coeffici~
#> 9 dialysis~ 3.05 0.340 3.28 1.49 5.70 3.57e-3 coeffici~
#> 10 steroidT~ 1.11 0.236 0.428 0.677 1.72 6.73e-1 coeffici~
#> 11 bleeddis~ 2.30 0.198 4.19 1.53 3.34 1.16e-4 coeffici~
#> 12 lapTRUE 1.23 0.156 1.33 0.900 1.66 1.91e-1 coeffici~
#> 13 0|1 245. 0.534 10.3 NA NA NA scale
#> 14 1|2 280. 0.534 10.5 NA NA NA scale
#> 15 2|3 470. 0.537 11.4 NA NA NA scale
#> 16 3|4 907. 0.544 12.5 NA NA NA scale
#> 17 4|5 2883. 0.572 13.9 NA NA NA scalelap, which is a variable that indicates if a patient underwent laparoscopic or open repair, is not significantly associated with any difference in the primary endpoint.
What else do we need here?