## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE,
echo = T, fig.width = 7, fig.height = 7)
options(width = 125, scipen = 5, digits = 5)
setwd("~/statistics/self.logistic/")
Ordinal variables have a natural ordering among the levels. An example used here is histological grade of cancer.
Comparisons are carried out at each possible cutoff (indicated by || below) along the ordinal scale (two for 3-level ordinal outcome variable). It is assumed that in each of the comparison below, the odds ratio assessing the effect of an exposure variable will be the same.
Comparison 1
“well differentiated” || “moderately differentiated” “poorly differentiated”
Comparison 2
“well differentiated” “moderately differentiated” || “poorly differentiated”
Order the outcome variable.
library(foreign)
cancer <- read.dta("http://www.sph.emory.edu/~dkleinb/datasets/cancer.dta")
cancer <- within(cancer, {
grade <- factor(grade, 0:2, c("well differentiated","moderately differentiated","poorly differentiated"),
ordered = TRUE)
race <- factor(race, 0:1, c("white","black"))
estrogen <- factor(estrogen, 0:1, c("never used","ever used"))
subtype <- factor(subtype, 0:2, c("Adenocarcinoma","Adenosquamous","Other"))
age <- factor(age, 0:1, c("50-64","65-79"))
smoking <- factor(smoking, 0:1, c("non-smoker","current smoker"))
})
The odds ratio obtained by collapsing the first and second column is close to the one obtained by collapsing the second and third columns. Therefore, proportional odss assumption is met.
xtabs(~ race + grade, cancer)
grade
race well differentiated moderately differentiated poorly differentiated
white 104 72 31
black 26 33 22
library(vcd)
lapply(levels(cancer$grade)[-1],
function(threshold) {
tab.res <- xtabs(~ race + (grade >= threshold), cancer)[2:1,2:1]
list(table = tab.res, OR = exp(oddsratio(tab.res)))
})
[[1]]
[[1]]$table
grade >= threshold
race TRUE FALSE
black 55 26
white 103 104
[[1]]$OR
[1] 2.1359
[[2]]
[[2]]$table
grade >= threshold
race TRUE FALSE
black 22 59
white 31 176
[[2]]$OR
[1] 2.117
In the crude analysis, Black people have 2.13 times higher odds of having higher grade tumor than lower grade tumor.
Only one set of odds ratio will be produced (assumed same for each comparison), but intercepts will be calculated for each threshold for comparison. Alternatively, one can run two separate logistic regression for each comparison and get two sets of ORs and intercepts.
library(MASS)
polr.race <- polr(grade ~ race, cancer)
summary(polr.race)
Call:
polr(formula = grade ~ race, data = cancer)
Coefficients:
Value Std. Error t value
raceblack 0.755 0.247 3.06
Intercepts:
Value Std. Error t value
well differentiated|moderately differentiated 0.009 0.137 0.066
moderately differentiated|poorly differentiated 1.739 0.177 9.849
Residual Deviance: 588.67
AIC: 594.67
## Odds ratio and 95% CI
c(exp(coef(polr.race)), exp(confint(polr.race)))
raceblack 2.5 % 97.5 %
2.1287 1.3148 3.4611
## Likelihood ratio test
library(car)
Anova(polr.race, type = 3)
Analysis of Deviance Table (Type III tests)
Response: grade
LR Chisq Df Pr(>Chisq)
race 9.45 1 0.0021 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plot predicted probabilities of each outcome
library(effects)
plot(effect("race", polr.race), style = "stacked")
Check proportional odds for estrogen variable
xtabs(~ estrogen + grade, cancer)
grade
estrogen well differentiated moderately differentiated poorly differentiated
never used 43 59 33
ever used 85 46 20
lapply(levels(cancer$grade)[-1],
function(threshold) {
tab.res <- xtabs(~ estrogen + (grade >= threshold), cancer)[2:1,2:1]
list(table = tab.res, OR = exp(oddsratio(tab.res)))
})
[[1]]
[[1]]$table
grade >= threshold
estrogen TRUE FALSE
ever used 66 85
never used 92 43
[[1]]$OR
[1] 0.36292
[[2]]
[[2]]$table
grade >= threshold
estrogen TRUE FALSE
ever used 20 131
never used 33 102
[[2]]$OR
[1] 0.47189
Fit polr
polr.race.estrogen <- polr(grade ~ race + estrogen, cancer)
summary(polr.race.estrogen)
Call:
polr(formula = grade ~ race + estrogen, data = cancer)
Coefficients:
Value Std. Error t value
raceblack 0.427 0.273 1.57
estrogenever used -0.776 0.250 -3.11
Intercepts:
Value Std. Error t value
well differentiated|moderately differentiated -0.511 0.213 -2.393
moderately differentiated|poorly differentiated 1.274 0.227 5.607
Residual Deviance: 575.21
AIC: 583.21
(2 observations deleted due to missingness)
## Odds ratio and 95% CI
cbind(OR = exp(coef(polr.race.estrogen)), exp(confint(polr.race.estrogen)))
OR 2.5 % 97.5 %
raceblack 1.53262 0.89761 2.61832
estrogenever used 0.46009 0.28117 0.74878
## Likelihood ratio test
Anova(polr.race.estrogen, type = 3)
Analysis of Deviance Table (Type III tests)
Response: grade
LR Chisq Df Pr(>Chisq)
race 2.45 1 0.1176
estrogen 9.78 1 0.0018 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plot predicted probabilities of each outcome
plot(effect("race:estrogen", polr.race.estrogen), style = "stacked")
This function is consistent with SAS PROC LOGISTIC with DESCENDING option if used with reverse = TRUE.
## Load VGAM
library(VGAM)
## Without proportional odds assumptions
vglm.race.estrogen.nonparallel <-
vglm(grade ~ race + estrogen, cancer, family = cumulative(link = "logit", parallel = FALSE, reverse = TRUE))
summary(vglm.race.estrogen.nonparallel)
Call:
vglm(formula = grade ~ race + estrogen, family = cumulative(link = "logit",
parallel = FALSE, reverse = TRUE), data = cancer)
Pearson Residuals:
Min 1Q Median 3Q Max
logit(P[Y>=2]) -1.583 -0.833 0.319 0.904 1.38
logit(P[Y>=3]) -0.829 -0.636 -0.277 -0.232 2.60
Coefficients:
Estimate Std. Error z value
(Intercept):1 0.585 0.231 2.53
(Intercept):2 -1.391 0.278 -5.00
raceblack:1 0.376 0.307 1.22
raceblack:2 0.516 0.351 1.47
estrogenever used:1 -0.874 0.270 -3.24
estrogenever used:2 -0.552 0.345 -1.60
Number of linear predictors: 2
Names of linear predictors: logit(P[Y>=2]), logit(P[Y>=3])
Dispersion Parameter for cumulative family: 1
Residual deviance: 574.32 on 566 degrees of freedom
Log-likelihood: -287.16 on 566 degrees of freedom
Number of iterations: 4
## With proportional odds assumptions
vglm.race.estrogen.parallel <-
vglm(grade ~ race + estrogen, cancer, family = cumulative(link = "logit", parallel = TRUE, reverse = TRUE))
summary(vglm.race.estrogen.parallel)
Call:
vglm(formula = grade ~ race + estrogen, family = cumulative(link = "logit",
parallel = TRUE, reverse = TRUE), data = cancer)
Pearson Residuals:
Min 1Q Median 3Q Max
logit(P[Y>=2]) -1.563 -0.847 0.323 0.964 1.34
logit(P[Y>=3]) -0.847 -0.585 -0.292 -0.223 2.75
Coefficients:
Estimate Std. Error z value
(Intercept):1 0.511 0.215 2.38
(Intercept):2 -1.274 0.229 -5.57
raceblack 0.427 0.272 1.57
estrogenever used -0.776 0.249 -3.11
Number of linear predictors: 2
Names of linear predictors: logit(P[Y>=2]), logit(P[Y>=3])
Dispersion Parameter for cumulative family: 1
Residual deviance: 575.21 on 568 degrees of freedom
Log-likelihood: -287.61 on 568 degrees of freedom
Number of iterations: 4
## Difference in -2 log likelihood
(diff.neg.2logLik <- -2*(logLik(vglm.race.estrogen.parallel) - logLik(vglm.race.estrogen.nonparallel)))
[1] 0.89729
## Difference in DF
(diff.df <- df.residual(vglm.race.estrogen.parallel) - df.residual(vglm.race.estrogen.nonparallel))
[1] 2
## Likelihood ratio test
pchisq(q = diff.neg.2logLik, df = diff.df, lower.tail = FALSE)
[1] 0.63849
This function is consistent with SAS PROC LOGISTIC with DESCENDING option by default.
## Load rms
library(rms)
lrm.race.estrogen <- lrm(grade ~ race + estrogen, cancer)
lrm.race.estrogen
Logistic Regression Model
lrm(formula = grade ~ race + estrogen, data = cancer)
Frequencies of Missing Values Due to Each Variable
grade race estrogen
0 0 2
Model Likelihood Discrimination Rank Discrim.
Ratio Test Indexes Indexes
Obs 286 LR chi2 19.71 R2 0.076 C 0.619
well differentiated 128 d.f. 2 g 0.539 Dxy 0.238
moderately differentiated 105 Pr(> chi2) <0.0001 gr 1.714 gamma 0.349
poorly differentiated 53 gp 0.129 tau-a 0.151
max |deriv| 2e-12 Brier 0.231
Coef S.E. Wald Z Pr(>|Z|)
y>=moderately differentiated 0.5107 0.2134 2.39 0.0167
y>=poorly differentiated -1.2744 0.2273 -5.61 <0.0001
race=black 0.4270 0.2726 1.57 0.1173
estrogen=ever used -0.7763 0.2495 -3.11 0.0019