# Packages
pacman::p_load(tidyverse, VGAM)
# Reverse logit function: expit
expit <- function(x){exp(x)/(1+exp(x))}
# Stem cell data
stemcell <- data.frame(
religion = factor(rep(3:1,each = 2),labels = c("N","M","V")),
gender = factor(rep(0:1,3), labels=c("M","F")),
def_fund = c(21,34,30,41,64,58),
prob_fund = c(52,67,52,83,50,63),
prob_not = c(24,30,18,23,16,15),
def_not = c(15,25,11,14,11,12)
)
The data are already summarized, with the number of responses for religious (X = Very, Moderate, Not), gender (Z), and stem cell funding opinion (Y)
stemcell
## religion gender def_fund prob_fund prob_not def_not
## 1 V M 21 52 24 15
## 2 V F 34 67 30 25
## 3 M M 30 52 18 11
## 4 M F 41 83 23 14
## 5 N M 64 50 16 11
## 6 N F 58 63 15 12
The function we want to use to fit the adjacent odds logistic
regression model is called vglm() from the
VGAM package, which is a very flexible function in that it
can fit many different models for multinomial data.
In addition to formula and data, we need to
specify:
family = acat(): adjacent categories
parallel = T: parallel lines (same slope)
stem_adj <-
vglm(
formula = cbind(def_fund, prob_fund, prob_not, def_not) ~ religion + gender,
family = acat(parallel = T),
data = stemcell
)
summary(stem_adj)
##
## Call:
## vglm(formula = cbind(def_fund, prob_fund, prob_not, def_not) ~
## religion + gender, family = acat(parallel = T), data = stemcell)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.14169 0.10303 1.375 0.16908
## (Intercept):2 -1.36725 0.12752 -10.722 < 2e-16 ***
## (Intercept):3 -0.70286 0.16370 -4.294 1.76e-05 ***
## religionM 0.29340 0.09699 3.025 0.00249 **
## religionV 0.53583 0.09625 5.567 2.59e-08 ***
## genderF 0.01309 0.07677 0.170 0.86463
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: loglink(P[Y=2]/P[Y=1]), loglink(P[Y=3]/P[Y=2]),
## loglink(P[Y=4]/P[Y=3])
##
## Residual deviance: 11.8976 on 12 degrees of freedom
##
## Log-likelihood: -48.0273 on 12 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## religionM religionV genderF
## 1.340978 1.708858 1.013175
Unfortunately, the broom package functions aren’t
compatible with the object created by vglm(), so we use
plain ol summary() to display the output.
IMPORTANT: These estimates model the odds of being more likely to oppose stem cell research!
That’s because, in the formula, we specified def_fund to
def_not in that order, and level 1 is the group you specify
first!
Alternatively, you can use the data in a ‘wide’ format using
pivot_longer(), but make sure to specify the order of the
levels of the factor!
vglm(
formula = stemcell_support ~ religion + gender,
family = acat(parallel = T),
data = stemcell |>
# Changing from wide to long format
pivot_longer(
cols = def_fund:def_not,
names_to = 'stemcell_support',
values_to = 'count'
) |>
# Ordering the levels of stemcell_support
mutate(
stemcell_support = factor(stemcell_support,
levels = c('def_fund', 'prob_fund', 'prob_not', 'def_not'),
ordered = T)
),
weights = count
) |>
summary()
##
## Call:
## vglm(formula = stemcell_support ~ religion + gender, family = acat(parallel = T),
## data = mutate(pivot_longer(stemcell, cols = def_fund:def_not,
## names_to = "stemcell_support", values_to = "count"),
## stemcell_support = factor(stemcell_support, levels = c("def_fund",
## "prob_fund", "prob_not", "def_not"), ordered = T)),
## weights = count)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.14169 0.10303 1.375 0.16907
## (Intercept):2 -1.36725 0.12752 -10.722 < 2e-16 ***
## (Intercept):3 -0.70286 0.16369 -4.294 1.76e-05 ***
## religionM 0.29340 0.09699 3.025 0.00249 **
## religionV 0.53583 0.09624 5.568 2.58e-08 ***
## genderF 0.01309 0.07677 0.170 0.86463
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: loglink(P[Y=2]/P[Y=1]), loglink(P[Y=3]/P[Y=2]),
## loglink(P[Y=4]/P[Y=3])
##
## Residual deviance: 2033.109 on 66 degrees of freedom
##
## Log-likelihood: -1016.554 on 66 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## religionM religionV genderF
## 1.340978 1.708858 1.013175
def_not fund to
def_fund1 = def not, 4 = def fund
Here, our model predicts the increase chance of supporting stem cell
by including rev = T as it reverses the order of the
fractions:
rev = F: \(\log(\pi_2 /
\pi_1)\)
rev = T: \(\log(\pi_1 /
\pi_2)\)
stem_rev <-
vglm(
formula = cbind(def_fund,prob_fund,prob_not,def_not) ~ religion + gender,
family = acat(parallel = T, rev = T),
data = stemcell
)
summary(stem_rev)
##
## Call:
## vglm(formula = cbind(def_fund, prob_fund, prob_not, def_not) ~
## religion + gender, family = acat(parallel = T, rev = T),
## data = stemcell)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.14169 0.10303 -1.375 0.16908
## (Intercept):2 1.36725 0.12752 10.722 < 2e-16 ***
## (Intercept):3 0.70286 0.16370 4.294 1.76e-05 ***
## religionM -0.29340 0.09699 -3.025 0.00249 **
## religionV -0.53583 0.09625 -5.567 2.59e-08 ***
## genderF -0.01309 0.07677 -0.170 0.86463
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: loglink(P[Y=1]/P[Y=2]), loglink(P[Y=2]/P[Y=3]),
## loglink(P[Y=3]/P[Y=4])
##
## Residual deviance: 11.8976 on 12 degrees of freedom
##
## Log-likelihood: -48.0273 on 12 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## religionM religionV genderF
## 0.7457246 0.5851862 0.9869967
Do we need parallel lines?
parallel = F
stem_np <-
vglm(
formula = cbind(def_fund, prob_fund, prob_not, def_not) ~ religion + gender,
family = acat(parallel = F, rev = T),
data = stemcell
)
summary(stem_np)
##
## Call:
## vglm(formula = cbind(def_fund, prob_fund, prob_not, def_not) ~
## religion + gender, family = acat(parallel = F, rev = T),
## data = stemcell)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.14149 0.15669 0.903 0.366540
## (Intercept):2 1.20121 0.22773 5.275 1.33e-07 ***
## (Intercept):3 0.38368 0.31036 1.236 0.216373
## religionM:1 -0.70955 0.19676 -3.606 0.000311 ***
## religionM:2 -0.11568 0.27061 -0.427 0.669029
## religionM:3 0.20917 0.37503 0.558 0.577019
## religionV:1 -0.83989 0.20922 -4.014 5.96e-05 ***
## religionV:2 -0.51558 0.26136 -1.973 0.048531 *
## religionV:3 0.01304 0.34593 0.038 0.969931
## genderF:1 -0.12598 0.16817 -0.749 0.453799
## genderF:2 0.18153 0.20877 0.870 0.384561
## genderF:3 -0.16828 0.28096 -0.599 0.549214
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: loglink(P[Y=1]/P[Y=2]), loglink(P[Y=2]/P[Y=3]),
## loglink(P[Y=3]/P[Y=4])
##
## Residual deviance: 2.1803 on 6 degrees of freedom
##
## Log-likelihood: -43.1686 on 6 degrees of freedom
##
## Number of Fisher scoring iterations: 3
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## religionM:1 religionM:2 religionM:3 religionV:1 religionV:2 religionV:3
## 0.4918660 0.8907611 1.2326581 0.4317571 0.5971534 1.0131252
## genderF:1 genderF:2 genderF:3
## 0.8816334 1.1990510 0.8451165
You can specify which predictors you want to be parallel vs not
parallel with parallel = F ~ x will create non-parallel
lines for the variables after ~
coef(
vglm(
cbind(def_fund, prob_fund, prob_not, def_not) ~ religion + gender,
family = acat(parallel = F ~ gender, rev = T),
data = stemcell
)
)
## (Intercept):1 (Intercept):2 (Intercept):3 religionM religionV
## -0.06454349 1.25302630 0.77390303 -0.29288898 -0.53550310
## genderF:1 genderF:2 genderF:3
## -0.15148128 0.18993669 -0.14151712
Getting the test statistic: Diff in Deviances –>
deviance()
Test stat:
(LRT_G <- deviance(stem_rev) - deviance(stem_np))
## [1] 9.71728
Degrees of freedom: \((J-2) \times k = (4-2)*3 = 6\)
Can get the degrees of freedom of the deviances using
df.residual() function
(df_par <- df.residual(stem_rev) - df.residual(stem_np))
## [1] 6
#p-value:
(p_val_par <- pchisq(LRT_G, df = df_par, lower.tail = F))
## [1] 0.137074
So we don’t have evidence that non-parallel lines are needed!
fitted() vs
predict()fitted(model) will return the estimated probabilities
for the data used to build the model.
predict(model, newdata =..., type = 'response') will let
calculate the estimated probabilities for new data sets if
newdata = is provided and the model data otherwise
fitted(stem_rev)
## def_fund prob_fund prob_not def_not
## 1 0.2196942 0.4325715 0.1883542 0.15938016
## 2 0.2160054 0.4309116 0.1901034 0.16297959
## 3 0.2920903 0.4513071 0.1542074 0.10239524
## 4 0.2880188 0.4508791 0.1560908 0.10501137
## 5 0.3859238 0.4446671 0.1133043 0.05610474
## 6 0.3816832 0.4455750 0.1150314 0.05771038
# Adding the probabilities
bind_cols(
stemcell,
data.frame(predict(stem_rev, type = "response"))
)
## New names:
## • `def_fund` -> `def_fund...3`
## • `prob_fund` -> `prob_fund...4`
## • `prob_not` -> `prob_not...5`
## • `def_not` -> `def_not...6`
## • `def_fund` -> `def_fund...7`
## • `prob_fund` -> `prob_fund...8`
## • `prob_not` -> `prob_not...9`
## • `def_not` -> `def_not...10`
## religion gender def_fund...3 prob_fund...4 prob_not...5 def_not...6
## 1 V M 21 52 24 15
## 2 V F 34 67 30 25
## 3 M M 30 52 18 11
## 4 M F 41 83 23 14
## 5 N M 64 50 16 11
## 6 N F 58 63 15 12
## def_fund...7 prob_fund...8 prob_not...9 def_not...10
## 1 0.2196942 0.4325715 0.1883542 0.15938016
## 2 0.2160054 0.4309116 0.1901034 0.16297959
## 3 0.2920903 0.4513071 0.1542074 0.10239524
## 4 0.2880188 0.4508791 0.1560908 0.10501137
## 5 0.3859238 0.4446671 0.1133043 0.05610474
## 6 0.3816832 0.4455750 0.1150314 0.05771038
We were treating religion as nominal when it is ordinal. If a
predictor is ordinal, we can assign each level a score and treat it as
numeric (linear in the scores). We default to
scores = 1:I
# Setting religion scores as not = 1, very = 3
stemcell$religion_score = as.numeric(stemcell$religion)
# Fitting the adjacent cell model
# 1=def not fund, 4=def fund
# here our model predicts the increase chance of supporting stem cell
# Treating religion as linear
stem_linear <-
vglm(
cbind(def_fund, prob_fund, prob_not, def_not) ~ religion_score + gender,
family = acat(reverse = T, parallel = T),
data=stemcell
)
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, : some
## quantities such as z, residuals, SEs may be inaccurate due to convergence at a
## half-step
summary(stem_linear)
##
## Call:
## vglm(formula = cbind(def_fund, prob_fund, prob_not, def_not) ~
## religion_score + gender, family = acat(reverse = T, parallel = T),
## data = stemcell)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.11635 0.12453 0.934 0.350
## (Intercept):2 1.62458 0.15021 10.816 < 2e-16 ***
## (Intercept):3 0.96069 0.18631 5.157 2.52e-07 ***
## religion_score -0.26681 0.04787 -5.574 2.49e-08 ***
## genderF -0.01412 0.07671 -0.184 0.854
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: loglink(P[Y=1]/P[Y=2]), loglink(P[Y=2]/P[Y=3]),
## loglink(P[Y=3]/P[Y=4])
##
## Residual deviance: 11.9972 on 13 degrees of freedom
##
## Log-likelihood: -48.0771 on 13 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## religion_score genderF
## 0.7658174 0.9859792
Can we treat religiousness linearly (\(H_0\))? Or does it need to be a factor (\(H_a\))?
(G_linear <- deviance(stem_linear) - deviance(stem_adj))
## [1] 0.09960573
pchisq(G_linear,df = 1,lower.tail = F)
## [1] 0.7523033
Seems like we can treat religiousness as linear And the term for gender is not statistically significant!