modeling

library(nsqipHerniaCOPD)

Creating Data Sets

select puf.*, cpttab.cpt
from nsqip.puf
inner join
(select caseid, ANY_VALUE(nproc) nproc, ANY_VALUE(cpt) cpt
from nsqip.puf_cpt
group by caseid
having count(1) = 1
and cpt in ('49650', '49505', '49525') and nproc = 1
) as cpttab
on puf.caseid = cpttab.caseid

The NSQIP SQL database was queried as above. This returned all patients with CPT codes of 49650, 49505, and 49525 without concurrent or other procedures documented.

df <- include(df, age >= 18, surgspec == "General surgery", !emergncy, electsurg, transt == "Admitted from home", !ventilat, !pnapatos, !prsepis, asaclas < 5)

The above code included only patients with the following criteria:

df$dindo <- nsqipr::dindo(df)

The above code categorizes patients into a Dindo-Clavien class according to the following schema:

dindo_1 <- c("supinfec","wndinfd","dehis","renainsf")
dindo_2 <- c("orgspcssi","oupneumo","urninfec","pulembol","othbleed","othdvt","othsysep","othcdiff")
dindo_3 <- c("returnor")
dindo_4 <- c("reintub","failwean","oprenafl","cnscva","cdarrest","cdmi","othseshock","cnscoma","neurodef")
dindo_5 <- c("dopertod")

Matching

Propensity-score matching is used for pre-processing.

df_match <- MatchIt::matchit(hxcopd ~ sex + race + age + diabetes + smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + renafail + dialysis + steroid + bleeddis + wtloss + lap, data = df_m)

The above code matches patients with and without COPD according to sex, race, age, diabetes, smoking status, dypsnea, functional status, ascites, CHF, hypertensive medicaiton use, renal failure, dialysis, steroid use, bleeding disorders, > 10% weight loss within 6 months of surgery, and laparoscopic vs. open technique. It utilizes a 1:1 nearest neighbor propensity-score match. The results of the match can be seen below:

summary(df_match)
#> 
#> Call:
#> MatchIt::matchit(formula = hxcopd ~ sex + race + age + diabetes + 
#>     smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + 
#>     renafail + dialysis + steroid + bleeddis + wtloss + lap, 
#>     data = df_m)
#> 
#> Summary of balance for all data:
#>                                         Means Treated Means Control SD Control
#> distance                                       0.1445        0.0265     0.0507
#> sexFALSE                                       0.1315        0.0861     0.2805
#> sexTRUE                                        0.8685        0.9139     0.2805
#> raceAsian                                      0.0202        0.0314     0.1743
#> raceBlack                                      0.0894        0.0957     0.2941
#> raceNative Hawaiian or Pacific islander        0.0024        0.0031     0.0553
#> raceWhite                                      0.8836        0.8641     0.3427
#> age                                           69.1024       57.4566    16.5378
#> diabetesTRUE                                   0.1191        0.0744     0.2624
#> smokeTRUE                                      0.3816        0.1765     0.3813
#> dyspneaTRUE                                    0.3065        0.0232     0.1507
#> fnstatus2Partially dependent                   0.0162        0.0039     0.0621
#> fnstatus2Totally dependent                     0.0008        0.0003     0.0181
#> ascitesTRUE                                    0.0034        0.0014     0.0368
#> hxchfTRUE                                      0.0278        0.0030     0.0544
#> hypermedTRUE                                   0.6346        0.3695     0.4827
#> renafailTRUE                                   0.0021        0.0005     0.0222
#> dialysisTRUE                                   0.0143        0.0053     0.0727
#> steroidTRUE                                    0.0774        0.0162     0.1264
#> bleeddisTRUE                                   0.0546        0.0202     0.1408
#> wtlossTRUE                                     0.0093        0.0020     0.0450
#> lapTRUE                                        0.2262        0.3452     0.4754
#>                                         Mean Diff eQQ Med eQQ Mean eQQ Max
#> distance                                   0.1181  0.0469   0.1179  0.5011
#> sexFALSE                                   0.0455  0.0000   0.0453  1.0000
#> sexTRUE                                   -0.0455  0.0000   0.0453  1.0000
#> raceAsian                                 -0.0112  0.0000   0.0114  1.0000
#> raceBlack                                 -0.0063  0.0000   0.0064  1.0000
#> raceNative Hawaiian or Pacific islander   -0.0007  0.0000   0.0008  1.0000
#> raceWhite                                  0.0195  0.0000   0.0196  1.0000
#> age                                       11.6457 11.0000  11.6465 25.0000
#> diabetesTRUE                               0.0447  0.0000   0.0446  1.0000
#> smokeTRUE                                  0.2051  0.0000   0.2050  1.0000
#> dyspneaTRUE                                0.2833  0.0000   0.2832  1.0000
#> fnstatus2Partially dependent               0.0123  0.0000   0.0122  1.0000
#> fnstatus2Totally dependent                 0.0005  0.0000   0.0003  1.0000
#> ascitesTRUE                                0.0021  0.0000   0.0019  1.0000
#> hxchfTRUE                                  0.0249  0.0000   0.0247  1.0000
#> hypermedTRUE                               0.2651  0.0000   0.2649  1.0000
#> renafailTRUE                               0.0016  0.0000   0.0016  1.0000
#> dialysisTRUE                               0.0090  0.0000   0.0088  1.0000
#> steroidTRUE                                0.0612  0.0000   0.0610  1.0000
#> bleeddisTRUE                               0.0344  0.0000   0.0342  1.0000
#> wtlossTRUE                                 0.0073  0.0000   0.0072  1.0000
#> lapTRUE                                   -0.1190  0.0000   0.1191  1.0000
#> 
#> 
#> Summary of balance for matched data:
#>                                         Means Treated Means Control SD Control
#> distance                                       0.1445        0.1439     0.1712
#> sexFALSE                                       0.1315        0.1246     0.3303
#> sexTRUE                                        0.8685        0.8754     0.3303
#> raceAsian                                      0.0202        0.0223     0.1476
#> raceBlack                                      0.0894        0.0957     0.2943
#> raceNative Hawaiian or Pacific islander        0.0024        0.0037     0.0608
#> raceWhite                                      0.8836        0.8735     0.3324
#> age                                           69.1024       69.7232    11.3670
#> diabetesTRUE                                   0.1191        0.1254     0.3313
#> smokeTRUE                                      0.3816        0.3726     0.4836
#> dyspneaTRUE                                    0.3065        0.3042     0.4601
#> fnstatus2Partially dependent                   0.0162        0.0135     0.1155
#> fnstatus2Totally dependent                     0.0008        0.0005     0.0230
#> ascitesTRUE                                    0.0034        0.0024     0.0488
#> hxchfTRUE                                      0.0278        0.0233     0.1510
#> hypermedTRUE                                   0.6346        0.6484     0.4775
#> renafailTRUE                                   0.0021        0.0013     0.0364
#> dialysisTRUE                                   0.0143        0.0133     0.1144
#> steroidTRUE                                    0.0774        0.0655     0.2474
#> bleeddisTRUE                                   0.0546        0.0607     0.2389
#> wtlossTRUE                                     0.0093        0.0069     0.0828
#> lapTRUE                                        0.2262        0.2190     0.4137
#>                                         Mean Diff eQQ Med eQQ Mean eQQ Max
#> distance                                   0.0006       0   0.0007  0.0652
#> sexFALSE                                   0.0069       0   0.0069  1.0000
#> sexTRUE                                   -0.0069       0   0.0069  1.0000
#> raceAsian                                 -0.0021       0   0.0021  1.0000
#> raceBlack                                 -0.0064       0   0.0064  1.0000
#> raceNative Hawaiian or Pacific islander   -0.0013       0   0.0013  1.0000
#> raceWhite                                  0.0101       0   0.0101  1.0000
#> age                                       -0.6208       1   0.6500  3.0000
#> diabetesTRUE                              -0.0064       0   0.0064  1.0000
#> smokeTRUE                                  0.0090       0   0.0090  1.0000
#> dyspneaTRUE                                0.0024       0   0.0024  1.0000
#> fnstatus2Partially dependent               0.0027       0   0.0027  1.0000
#> fnstatus2Totally dependent                 0.0003       0   0.0003  1.0000
#> ascitesTRUE                                0.0011       0   0.0011  1.0000
#> hxchfTRUE                                  0.0045       0   0.0045  1.0000
#> hypermedTRUE                              -0.0138       0   0.0138  1.0000
#> renafailTRUE                               0.0008       0   0.0008  1.0000
#> dialysisTRUE                               0.0011       0   0.0011  1.0000
#> steroidTRUE                                0.0119       0   0.0119  1.0000
#> bleeddisTRUE                              -0.0061       0   0.0061  1.0000
#> wtlossTRUE                                 0.0024       0   0.0024  1.0000
#> lapTRUE                                    0.0072       0   0.0072  1.0000
#> 
#> Percent Balance Improvement:
#>                                         Mean Diff.  eQQ Med eQQ Mean eQQ Max
#> distance                                   99.4751 100.0000  99.4432  86.989
#> sexFALSE                                   84.8342   0.0000  84.7953   0.000
#> sexTRUE                                    84.8342   0.0000  84.7953   0.000
#> raceAsian                                  81.0867   0.0000  81.3953   0.000
#> raceBlack                                  -1.0740   0.0000   0.0000   0.000
#> raceNative Hawaiian or Pacific islander   -94.0971   0.0000 -66.6667   0.000
#> raceWhite                                  48.4096   0.0000  48.6486   0.000
#> age                                        94.6694  90.9091  94.4193  88.000
#> diabetesTRUE                               85.7552   0.0000  85.7143   0.000
#> smokeTRUE                                  95.6037   0.0000  95.6016   0.000
#> dyspneaTRUE                                99.1576   0.0000  99.1573   0.000
#> fnstatus2Partially dependent               78.4583   0.0000  78.2609   0.000
#> fnstatus2Totally dependent                 43.2438   0.0000   0.0000   0.000
#> ascitesTRUE                                49.3217   0.0000  42.8571   0.000
#> hxchfTRUE                                  81.8754   0.0000  81.7204   0.000
#> hypermedTRUE                               94.7979   0.0000  94.7948   0.000
#> renafailTRUE                               51.1630   0.0000  50.0000   0.000
#> dialysisTRUE                               88.2156   0.0000  87.8788   0.000
#> steroidTRUE                                80.4980   0.0000  80.4348   0.000
#> bleeddisTRUE                               82.2672   0.0000  82.1705   0.000
#> wtlossTRUE                                 67.0991   0.0000  66.6667   0.000
#> lapTRUE                                    93.9816   0.0000  93.9866   0.000
#> 
#> Sample sizes:
#>           Control Treated
#> All        121834    3771
#> Matched      3771    3771
#> Unmatched  118063       0
#> Discarded       0       0

If necessary, I can produce a matched pair matrix with df_match$match.matrix for matched pair analysis.

Model fitting

First, I will fit my unadjusted model. I use ordinal logistic regression (provided by MASS:polr package) because I am regressing on to an ordinal outcome variable (dindo) with levels 0 through 5.

unadjfit <- MASS::polr(dindo ~ hxcopd, data = df_m)
unadjfit.or <- exp(cbind(coef(unadjfit), t(confint(unadjfit))))
#> Waiting for profiling to be done...
#> 
#> Re-fitting to get Hessian

The model is summarized:

summary(unadjfit)
#> 
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd, data = df_m)
#> 
#> Coefficients:
#>             Value Std. Error t value
#> hxcopdTRUE 0.4003     0.1323   3.027
#> 
#> Intercepts:
#>     Value   Std. Error t value
#> 0|1  3.6128  0.1018    35.4742
#> 1|2  3.7429  0.1045    35.8053
#> 2|3  4.2549  0.1181    36.0240
#> 3|4  4.9046  0.1443    33.9888
#> 4|5  6.0542  0.2276    26.6017
#> 
#> Residual Deviance: 2887.225 
#> AIC: 2899.225

polr outputs the ordinal logistic regression parameteized as:

\[logit (P(Y \le j)) = \beta_{j0} – \eta_{1}x_1 – \cdots – \eta_{p} x_p\]

Due to the parallel lines assumption, even though we have six categories, the coefficient of COPD (hxcopd) stays the same across the five categories. The two equations for hxcopd = 1 and hxcopd = 0 are

\[ \begin{eqnarray} logit (P(Y \le j | x_1=1) & = & \beta_{j0} – \eta_{1} \\ logit (P(Y \le j | x_1=0) & = & \beta_{j0} \end{eqnarray} \]

Therefore:

\[logit (P(Y \le j)|x_1=1) -logit (P(Y \le j)|x_1=0) = – \eta_{1}.\]

Normally, I could get the odds ratio by exponentiating both sides of this equation and using \(log(b)-log(a) = log(b/a)\):

\[\frac{P(Y \le j |x_1=1)}{P(Y>j|x_1=1)} / \frac{P(Y \le j |x_1=0)}{P(Y>j|x_1=0)} = exp( -\eta_{1}).\]

Which by the proportional odds assumption can be simplified:

\[\frac{P(Y \le j |x_1=1)}{P(Y>j|x_1=1)} = p_1 / (1-p_1) \]

\[\frac{P(Y \le j |x_1=0)}{P(Y>j|x_1=0)} = p_0 / (1-p_0)\]

The odds ratio is defined as:

\[\frac{p_1 / (1-p_1) }{p_0 / (1-p_0)} = exp( -\eta_{1}).\]

However, the coefficient reported by summary(unadjfit) is actually \(\eta\), not \(-\eta\). So I will have to flip the odds ratio:

Since \(exp(-\eta_{1}) = \frac{1}{exp(\eta_{1})}\),

\[exp(\eta_{1}) = \frac{p_0 / (1-p_0) }{p_1 / (1-p_1)}.\]

From the output, \(\hat{\eta}_1=0.4003126\), the odds ratio \(exp(\hat{\eta}_1)=1.4922911\) is actually \(\frac{p_0 / (1-p_0) }{p_1 / (1-p_1)}\).

This can be interpreted as “people without COPD have 1.4922911 higher odds of being in a category \(\leq J\) vs. \(>J\) when compared to patients with COPD” where \(J = \{0, 1,2,3,4,5\}\).

To make this more interpretable, you can switch around some of the signs:

\[ \begin{eqnarray} exp(-\eta_{1}) & = & \frac{p_1 / (1-p_1)}{p_0/(1-p_0)} \\ & = & \frac{p_1 (1-p_0)}{p_0(1-p_1)} \\ & = & \frac{(1-p_0)/p_0}{(1-p_1)/p_1} \\ & = & \frac{P (Y >j | x=0)/P(Y \le j|x=0)}{P(Y > j | x=1)/P(Y \le j | x=1)}. \end{eqnarray} \]

Since \(exp(-\eta_{1}) = \frac{1}{exp(\eta_{1})}\),

\[\frac{P (Y >j | x=1)/P(Y \le j|x=1)}{P(Y > j | x=0)/P(Y \le j | x=0)} = exp(\eta).\]

Instead of interpreting the odds of being in category \(\leq J\), we can interpret the odds of being in category \(>J\): “people with COPD have 1.4922911 times the odds of being in category \(>J\) compared to people without COPD.”

The 95% confidence interval can be easily calculated:

exp(confint(unadjfit))
#> Waiting for profiling to be done...
#> 
#> Re-fitting to get Hessian
#>    2.5 %   97.5 % 
#> 1.153489 1.938509

In order to fit the adjusted model, I start by suppying all the variables include in the match:

adjfit <- MASS::polr(dindo ~ hxcopd + sex + race + age + diabetes + smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + renafail + dialysis + steroid + bleeddis + wtloss + lap, data = df_m)
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(adjfit)
#> 
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd + sex + race + age + diabetes + 
#>     smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + 
#>     renafail + dialysis + steroid + bleeddis + wtloss + lap, 
#>     data = df_m)
#> 
#> Coefficients:
#>                                             Value Std. Error    t value
#> hxcopdTRUE                                0.42863  1.342e-01  3.193e+00
#> sexTRUE                                  -0.06410  1.940e-01 -3.303e-01
#> raceAsian                                -1.00369  1.245e+00 -8.063e-01
#> raceBlack                                 0.22738  1.036e+00  2.195e-01
#> raceNative Hawaiian or Pacific islander -10.96370  6.278e-05 -1.746e+05
#> raceWhite                                -0.10687  1.021e+00 -1.046e-01
#> age                                       0.02040  6.960e-03  2.931e+00
#> diabetesTRUE                              0.57178  1.670e-01  3.424e+00
#> smokeTRUE                                 0.16700  1.515e-01  1.103e+00
#> dyspneaTRUE                               0.17248  1.399e-01  1.233e+00
#> fnstatus2Partially dependent              0.78438  3.369e-01  2.328e+00
#> fnstatus2Totally dependent              -14.00459  8.157e-09 -1.717e+09
#> ascitesTRUE                               1.82496  5.805e-01  3.144e+00
#> hxchfTRUE                                 0.99521  2.560e-01  3.888e+00
#> hypermedTRUE                              0.04481  1.508e-01  2.972e-01
#> renafailTRUE                              0.35707  1.070e+00  3.338e-01
#> dialysisTRUE                              0.98040  3.492e-01  2.807e+00
#> steroidTRUE                               0.07941  2.370e-01  3.350e-01
#> bleeddisTRUE                              0.80356  2.006e-01  4.007e+00
#> wtlossTRUE                                0.97155  4.148e-01  2.342e+00
#> lapTRUE                                   0.24056  1.564e-01  1.538e+00
#> 
#> Intercepts:
#>     Value         Std. Error    t value      
#> 0|1  5.421500e+00  1.156200e+00  4.689200e+00
#> 1|2  5.554000e+00  1.156400e+00  4.802700e+00
#> 2|3  6.074700e+00  1.157800e+00  5.246600e+00
#> 3|4  6.732100e+00  1.160900e+00  5.799100e+00
#> 4|5  7.888900e+00  1.174300e+00  6.718100e+00
#> 
#> Residual Deviance: 2786.892 
#> AIC: 2838.892

However, note the warning. This is because the level of fnstatus2 called Totally dependent has only 5 records. Each of these records ends up with the same Dindo-Clavien classification of 0. This leads to complete separation of the variable fnstatus2Totally dependent with regards to the outcome variable dindo. The maximum likelihood estimate for fnstatus2Totally dependent does not exist. Another hint is the large coefficient of -14.0045892 and the even larger standard error.

We can resolve this complete separation by collapsing fnstatus2 into two levels (“Independent” and “Dependent”):

df_m$fnstatus2 <- factor(df_m$fnstatus2)
levels(df_m$fnstatus2) <- list(Independent = "Independent", "Dependent" = c("Partially dependent", "Totally dependent"))

adjfit <- MASS::polr(dindo ~ hxcopd + sex + race + age + diabetes + smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + renafail + dialysis + steroid + bleeddis + wtloss + lap, data = df_m)

summary(adjfit)
#> 
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd + sex + race + age + diabetes + 
#>     smoke + dyspnea + fnstatus2 + ascites + hxchf + hypermed + 
#>     renafail + dialysis + steroid + bleeddis + wtloss + lap, 
#>     data = df_m)
#> 
#> Coefficients:
#>                                             Value Std. Error    t value
#> hxcopdTRUE                                0.42908  1.342e-01  3.197e+00
#> sexTRUE                                  -0.06567  1.940e-01 -3.385e-01
#> raceAsian                                -1.01655  1.245e+00 -8.166e-01
#> raceBlack                                 0.22811  1.036e+00  2.202e-01
#> raceNative Hawaiian or Pacific islander -10.71234  8.076e-05 -1.326e+05
#> raceWhite                                -0.10699  1.021e+00 -1.048e-01
#> age                                       0.02040  6.956e-03  2.932e+00
#> diabetesTRUE                              0.57169  1.670e-01  3.424e+00
#> smokeTRUE                                 0.16755  1.515e-01  1.106e+00
#> dyspneaTRUE                               0.17326  1.398e-01  1.239e+00
#> fnstatus2Dependent                        0.75604  3.359e-01  2.251e+00
#> ascitesTRUE                               1.82831  5.803e-01  3.150e+00
#> hxchfTRUE                                 0.99739  2.559e-01  3.897e+00
#> hypermedTRUE                              0.04599  1.508e-01  3.050e-01
#> renafailTRUE                              0.35625  1.070e+00  3.330e-01
#> dialysisTRUE                              0.98028  3.492e-01  2.807e+00
#> steroidTRUE                               0.08093  2.370e-01  3.415e-01
#> bleeddisTRUE                              0.80512  2.005e-01  4.015e+00
#> wtlossTRUE                                0.97516  4.147e-01  2.352e+00
#> lapTRUE                                   0.24020  1.564e-01  1.536e+00
#> 
#> Intercepts:
#>     Value        Std. Error   t value     
#> 0|1       5.4216       1.1561       4.6896
#> 1|2       5.5541       1.1564       4.8031
#> 2|3       6.0748       1.1578       5.2470
#> 3|4       6.7322       1.1608       5.7995
#> 4|5       7.8889       1.1742       6.7186
#> 
#> Residual Deviance: 2787.425 
#> AIC: 2837.425

In order to produce the best fit, I utilize both backwards and forwards step-wise model selection by AIC:

adjfit.step <- MASS::stepAIC(adjfit, direction = "both", trace = FALSE)

summary(adjfit.step)
#> 
#> Re-fitting to get Hessian
#> Call:
#> MASS::polr(formula = dindo ~ hxcopd + age + diabetes + fnstatus2 + 
#>     ascites + hxchf + dialysis + bleeddis + wtloss, data = df_m)
#> 
#> Coefficients:
#>                      Value Std. Error t value
#> hxcopdTRUE         0.42097   0.133888   3.144
#> age                0.01624   0.006175   2.631
#> diabetesTRUE       0.57029   0.164486   3.467
#> fnstatus2Dependent 0.77247   0.336009   2.299
#> ascitesTRUE        1.73700   0.580973   2.990
#> hxchfTRUE          1.04844   0.250773   4.181
#> dialysisTRUE       1.00015   0.342407   2.921
#> bleeddisTRUE       0.81298   0.198346   4.099
#> wtlossTRUE         0.97523   0.415840   2.345
#> 
#> Intercepts:
#>     Value   Std. Error t value
#> 0|1  5.0570  0.4572    11.0611
#> 1|2  5.1893  0.4578    11.3346
#> 2|3  5.7091  0.4612    12.3777
#> 3|4  6.3653  0.4687    13.5808
#> 4|5  7.5204  0.5007    15.0188
#> 
#> Residual Deviance: 2799.438 
#> AIC: 2827.438

Exponentiating the coefficients to obtain odds ratios:

broom::tidy(adjfit.step, conf.int = TRUE, p.values = TRUE, exponentiate = TRUE)
#> 
#> Re-fitting to get Hessian
#> # A tibble: 14 x 8
#>    term       estimate std.error statistic conf.low conf.high  p.value coef.type
#>    <chr>         <dbl>     <dbl>     <dbl>    <dbl>     <dbl>    <dbl> <chr>    
#>  1 hxcopdTRUE     1.52   0.134        3.14     1.17      1.99  1.51e-3 coeffici~
#>  2 age            1.02   0.00618      2.63     1.00      1.03  7.67e-3 coeffici~
#>  3 diabetesT~     1.77   0.164        3.47     1.27      2.42  9.76e-4 coeffici~
#>  4 fnstatus2~     2.17   0.336        2.30     1.06      4.01  3.49e-2 coeffici~
#>  5 ascitesTR~     5.68   0.581        2.99     1.57     16.1   1.14e-2 coeffici~
#>  6 hxchfTRUE      2.85   0.251        4.18     1.70      4.56  1.72e-4 coeffici~
#>  7 dialysisT~     2.72   0.342        2.92     1.32      5.09  8.66e-3 coeffici~
#>  8 bleeddisT~     2.25   0.198        4.10     1.50      3.28  1.60e-4 coeffici~
#>  9 wtlossTRUE     2.65   0.416        2.35     1.09      5.66  3.29e-2 coeffici~
#> 10 0|1          157.     0.457       11.1     NA        NA    NA       scale    
#> 11 1|2          179.     0.458       11.3     NA        NA    NA       scale    
#> 12 2|3          302.     0.461       12.4     NA        NA    NA       scale    
#> 13 3|4          581.     0.469       13.6     NA        NA    NA       scale    
#> 14 4|5         1845.     0.501       15.0     NA        NA    NA       scale

The adjusted odds ratio of 1.5234386 is very similar to the unadjusted odds ratio of 1.4922911. This is because we had very good matching which essentially negates the effect of our covariates. Nonetheless, the logistic regression is more robust and has greater power than a chi-square test. Not to mention a chi-square test does not tell us the size of the effect.

I can test the parallel regression assumption with a Brant test:

brant::brant(adjfit.step)
#> ---------------------------------------------------- 
#> Test for     X2  df  probability 
#> ---------------------------------------------------- 
#> Omnibus          -756.67 36  1
#> hxcopdTRUE       0.44    4   0.98
#> age          6.25    4   0.18
#> diabetesTRUE     1.7 4   0.79
#> fnstatus2Dependent   15.68   4   0
#> ascitesTRUE      127.76  4   0
#> hxchfTRUE        1.52    4   0.82
#> dialysisTRUE     19.19   4   0
#> bleeddisTRUE     2.21    4   0.7
#> wtlossTRUE       40.17   4   0
#> ---------------------------------------------------- 
#> 
#> H0: Parallel Regression Assumption holds
#> Warning in brant::brant(adjfit.step): 1402 combinations in table(dv,ivs) do not
#> occur. Because of that, the test results might be invalid.

The omnibus test is non-significant but there are some significant covariates. I’m still trying to figure out how to interpret whether or not this means the assumption holds.

In addition, the Hosmer and Lemeshow test is non-significant, which suggests the assumption holds:

generalhoslem::logitgof(df_m$dindo, fitted(adjfit.step), ord = TRUE)
#> Warning in generalhoslem::logitgof(df_m$dindo, fitted(adjfit.step), ord =
#> TRUE): At least one cell in the expected frequencies table is < 1. Chi-square
#> approximation may be incorrect.
#> 
#>  Hosmer and Lemeshow test (ordinal model)
#> 
#> data:  df_m$dindo, fitted(adjfit.step)
#> X-squared = 32.485, df = 44, p-value = 0.9001

Alternatively, many will recommend against the use of step-wise model selection due to its potential to produce significant bias. Instead, they advocate for covariate selection based on clinical expertise and insight. For that reason, I would build a model with covariates that I really thought were strongly related to outcome. I would argue sex, race, dyspnea, weight loss, and steroid use are probably not strongly related with the outcomes we are examining. I realize you could make the argument that any of these are in some way, but I’m trying to pick out the ones I think are strongly related and also trying to minimize the number of covariates in our model.

adjfit <- MASS::polr(dindo ~ hxcopd + age + diabetes + smoke + fnstatus2 + ascites + hxchf + renafail + dialysis + steroid + bleeddis + lap, data = df_m)
broom::tidy(adjfit, conf.int = TRUE, p.values = TRUE, exponentiate = TRUE)
#> 
#> Re-fitting to get Hessian
#> # A tibble: 17 x 8
#>    term      estimate std.error statistic conf.low conf.high   p.value coef.type
#>    <chr>        <dbl>     <dbl>     <dbl>    <dbl>     <dbl>     <dbl> <chr>    
#>  1 hxcopdTR~     1.51   0.134       3.10    1.17        1.97   1.79e-3 coeffici~
#>  2 age           1.02   0.00683     3.06    1.01        1.04   1.89e-3 coeffici~
#>  3 diabetes~     1.77   0.165       3.48    1.27        2.43   9.40e-4 coeffici~
#>  4 smokeTRUE     1.21   0.151       1.25    0.896       1.62   2.14e-1 coeffici~
#>  5 fnstatus~     2.29   0.335       2.48    1.13        4.23   2.39e-2 coeffici~
#>  6 ascitesT~     6.65   0.571       3.32    1.87       18.5    6.05e-3 coeffici~
#>  7 hxchfTRUE     3.03   0.248       4.47    1.82        4.82   6.62e-5 coeffici~
#>  8 renafail~     1.52   1.06        0.393   0.0814      8.23   7.10e-1 coeffici~
#>  9 dialysis~     3.05   0.340       3.28    1.49        5.70   3.57e-3 coeffici~
#> 10 steroidT~     1.11   0.236       0.428   0.677       1.72   6.73e-1 coeffici~
#> 11 bleeddis~     2.30   0.198       4.19    1.53        3.34   1.16e-4 coeffici~
#> 12 lapTRUE       1.23   0.156       1.33    0.900       1.66   1.91e-1 coeffici~
#> 13 0|1         245.     0.534      10.3    NA          NA     NA       scale    
#> 14 1|2         280.     0.534      10.5    NA          NA     NA       scale    
#> 15 2|3         470.     0.537      11.4    NA          NA     NA       scale    
#> 16 3|4         907.     0.544      12.5    NA          NA     NA       scale    
#> 17 4|5        2883.     0.572      13.9    NA          NA     NA       scale

lap, which is a variable that indicates if a patient underwent laparoscopic or open repair, is not significantly associated with any difference in the primary endpoint.

What else do we need here?