pacman::p_load(
car,
broom,
tidyverse,
ggfortify,
mosaic,
huxtable,
jtools,
latex2exp,
pubh,
sjlabelled,
sjPlot,
sjmisc
)
theme_set(sjPlot::theme_sjplot2(base_size = 10))
theme_update(legend.position = "top")
# options('huxtable.knit_print_df' = FALSE)
options('huxtable.knit_print_df_theme' = theme_article)
options('huxtable.autoformat_number_format' = list(numeric = "%5.2f"))
knitr::opts_chunk$set(collapse = TRUE, comment = NA)
Onchocerciasis in Sierra Leone.
data(Oncho)
Oncho %>% head()
id | mf | area | agegrp | sex | mfload | lesions |
---|---|---|---|---|---|---|
1 | Infected | Savannah | 20-39 | Female | 1 | No |
2 | Infected | Rainforest | 40+ | Male | 3 | No |
3 | Infected | Savannah | 40+ | Female | 1 | No |
4 | Not-infected | Rainforest | 20-39 | Female | 0 | No |
5 | Not-infected | Savannah | 40+ | Female | 0 | No |
6 | Not-infected | Rainforest | 20-39 | Female | 0 | No |
Oncho %>%
mutate(mf = relevel(mf, ref = "Infected")) %>%
copy_labels(Oncho) %>%
cross_tab(mf ~ area)
Infection | levels | Infected | Not-infected | Total |
---|---|---|---|---|
Total N (%) | 822 (63.1) | 480 (36.9) | 1302 | |
Residence | Savannah | 281 (34.2) | 267 (55.6) | 548 (42.1) |
Rainforest | 541 (65.8) | 213 (44.4) | 754 (57.9) |
Table with all descriptive statistics except id and mfload:
Oncho %>%
select(-c(id, mfload)) %>%
mutate(mf = relevel(mf, ref = "Infected")) %>%
copy_labels(Oncho) %>%
cross_tab(mf ~ area + .)
Infection | levels | Infected | Not-infected | Total |
---|---|---|---|---|
Total N (%) | 822 (63.1) | 480 (36.9) | 1302 | |
Residence | Savannah | 281 (34.2) | 267 (55.6) | 548 (42.1) |
Rainforest | 541 (65.8) | 213 (44.4) | 754 (57.9) | |
Age group (years) | 5-9 | 46 (5.6) | 156 (32.5) | 202 (15.5) |
10-19 | 99 (12.0) | 119 (24.8) | 218 (16.7) | |
20-39 | 299 (36.4) | 125 (26.0) | 424 (32.6) | |
40+ | 378 (46.0) | 80 (16.7) | 458 (35.2) | |
Sex | Male | 426 (51.8) | 190 (39.6) | 616 (47.3) |
Female | 396 (48.2) | 290 (60.4) | 686 (52.7) | |
Number of lesions | No | 640 (77.9) | 461 (96.0) | 1101 (84.6) |
Yes | 182 (22.1) | 19 (4.0) | 201 (15.4) |
Data set about blood counts of T cells from patients in remission from Hodgkin’s disease or in remission from disseminated malignancies. We generate the new variable Ratio and add labels to the variables.
data(Hodgkin)
Hodgkin <- Hodgkin %>%
mutate(Ratio = CD4/CD8) %>%
var_labels(
Ratio = "CD4+ / CD8+ T-cells ratio"
)
Hodgkin %>% head()
CD4 | CD8 | Group | Ratio |
---|---|---|---|
396 | 836 | Hodgkin | 0.47 |
568 | 978 | Hodgkin | 0.58 |
1212 | 1678 | Hodgkin | 0.72 |
171 | 212 | Hodgkin | 0.81 |
554 | 670 | Hodgkin | 0.83 |
1104 | 1335 | Hodgkin | 0.83 |
Descriptive statistics for CD4+ T cells:
Hodgkin %>%
estat(~ CD4)
N | Min. | Max. | Mean | Median | SD | CV | |
---|---|---|---|---|---|---|---|
CD4+ T-cells | 40.00 | 116.00 | 2415.00 | 672.62 | 528.50 | 470.49 | 0.70 |
Hodgkin %>%
estat(~ Ratio|Group)
Disease | N | Min. | Max. | Mean | Median | SD | CV | |
---|---|---|---|---|---|---|---|---|
CD4+ / CD8+ T-cells ratio | Non-Hodgkin | 20.00 | 1.10 | 3.49 | 2.12 | 2.15 | 0.73 | 0.34 |
Hodgkin | 20.00 | 0.47 | 3.82 | 1.50 | 1.19 | 0.91 | 0.61 |
descriptive statistics for all variables in the data set:
Hodgkin %>%
mutate(Group = relevel(Group, ref = "Hodgkin")) %>%
copy_labels(Hodgkin) %>%
cross_tab(Group ~ CD4 + .)
Disease | levels | Hodgkin | Non-Hodgkin | Total |
---|---|---|---|---|
Total N (%) | 20 (50.0) | 20 (50.0) | 40 | |
CD4+ T-cells | Mean (SD) | 823.2 (566.4) | 522.0 (293.0) | 672.6 (470.5) |
CD8+ T-cells | Mean (SD) | 613.9 (397.9) | 260.0 (154.7) | 436.9 (347.7) |
CD4+ / CD8+ T-cells ratio | Mean (SD) | 1.5 (0.9) | 2.1 (0.7) | 1.8 (0.9) |
# theme_article() %>%
# add_footnote("Values are medians with interquartile range.")
var.test(Ratio ~ Group, data = Hodgkin)
F test to compare two variances
data: Ratio by Group
F = 0.6294, num df = 19, denom df = 19, p-value = 0.3214
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.2491238 1.5901459
sample estimates:
ratio of variances
0.6293991
QQ-plots against the standard Normal distribution.
Hodgkin %>%
qq_plot(~ Ratio|Group) %>%
axis_labs()
non-parametric test to compare the groups:
wilcox.test(Ratio ~ Group, data = Hodgkin)
Wilcoxon rank sum test
data: Ratio by Group
W = 298, p-value = 0.007331
alternative hypothesis: true location shift is not equal to 0
For relatively small samples (for example, less than 30 observations per group) is a standard practice to show the actual data in dot plots with error bars. The pubh package offers two options to show graphically differences in continuous outcomes among groups:
For small samples: strip_error For medium to large samples: bar_error
Hodgkin %>%
strip_error(Ratio ~ Group) %>%
axis_labs()
In the previous code, axis_labs applies labels from labelled data to the axis.
The error bars represent 95% confidence intervals around mean values.
Is relatively easy to add a line on top, to show that the two groups are significantly different. The function gf_star needs the reference point on how to draw an horizontal line to display statistical difference or to annotate a plot; in summary, gf_star:
Hodgkin %>%
strip_error(Ratio ~ Group) %>%
axis_labs() %>%
gf_star(x1 = 1, y1 = 4, x2 = 2, y2 = 4.05, y3 = 4.1, "**")
For larger samples we could use bar charts with error bars. For example:
data(birthwt, package = "MASS")
birthwt <- birthwt %>%
mutate(
smoke = factor(smoke, labels = c("Non-smoker", "Smoker")),
Race = factor(race > 1, labels = c("White", "Non-white")),
race = factor(race, labels = c("White", "Afican American", "Other"))
) %>%
var_labels(
bwt = 'Birth weight (g)',
smoke = 'Smoking status',
race = 'Race',
)
birthwt %>%
bar_error(bwt ~ smoke) %>%
axis_labs()
No summary function supplied, defaulting to `mean_se()`
Quick normality check:
birthwt %>%
qq_plot(~ bwt|smoke) %>%
axis_labs()
The (unadjusted) t-test:
t.test(bwt ~ smoke, data = birthwt)
Welch Two Sample t-test
data: bwt by smoke
t = 2.7299, df = 170.1, p-value = 0.007003
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
78.57486 488.97860
sample estimates:
mean in group Non-smoker mean in group Smoker
3055.696 2771.919
Final plot with annotation to highlight statistical difference (unadjusted):
birthwt %>%
bar_error(bwt ~ smoke) %>%
axis_labs() %>%
gf_star(x1 = 1, x2 = 2, y1 = 3400, y2 = 3500, y3 = 3550, "**")
No summary function supplied, defaulting to `mean_se()`
strip_error and bar_error can generate plots stratified by a third variable, for example:
birthwt %>%
bar_error(bwt ~ smoke, fill = ~ Race) %>%
gf_refine(ggsci::scale_fill_jama()) %>%
axis_labs()
No summary function supplied, defaulting to `mean_se()`
birthwt %>%
bar_error(bwt ~ smoke|Race) %>%
axis_labs()
No summary function supplied, defaulting to `mean_se()`
No summary function supplied, defaulting to `mean_se()`
analysis of the linear model of smoking on birth weight, adjusting by race (and not by other potential confounders)
birthwt %>%
strip_error(bwt ~ smoke, pch = ~ Race, col = ~ Race) %>%
gf_refine(ggsci::scale_color_jama()) %>%
axis_labs()
analysis of the linear model of smoking on birth weight, adjusting by race (and not by other potential confounders)
model_bwt <- lm(bwt ~ smoke + race, data = birthwt)
model_bwt %>%
glm_coef(labels = model_labels(model_bwt))
Parameter | Coefficient | Pr(>|t|) |
---|---|---|
Constant | 3334.95 (3036.11, 3633.78) | < 0.001 |
Smoking status: Smoker | -428.73 (-667.02, -190.44) | < 0.001 |
Race: Afican American | -450.36 (-750.31, -150.4) | 0.003 |
Race: Other | -452.88 (-775.17, -130.58) | 0.006 |
Similar results can be obtained with the function tab_model from the sjPlot package.
tab_model(model_bwt, collapse.ci = TRUE)
 | Birth weight(g) | |
---|---|---|
Predictors | Estimates | p |
(Intercept) |
3334.95 (3153.89 – 3516.01) |
<0.001 |
Smoking status: Smoker |
-428.73 (-643.86 – -213.60) |
<0.001 |
Race: Afican American |
-450.36 (-752.45 – -148.27) |
0.004 |
Race: Other |
-452.88 (-682.67 – -223.08) |
<0.001 |
Observations | 189 | |
R2 / R2 adjusted | 0.123 / 0.109 |
multiple(model_bwt, ~ race)$df
contrast | estimate | SE | t.ratio | p.value | lower.CL | upper.CL |
---|---|---|---|---|---|---|
White - Afican American | 450.36 | 153.12 | 2.94 | 0.01 | 89.87 | 810.85 |
White - Other | 452.88 | 116.48 | 3.89 | < 0.001 | 178.65 | 727.10 |
Afican American - Other | 2.52 | 160.59 | 0.02 | 1 | -375.57 | 380.60 |
Some advantages of glm_coef over tab_model are:
Script documents can be knitted to Word and LaTEX (besides HTML). Uses robust standard errors by default. The option to not use robust standard errors is part of the arguments. Recognises some type of models that tab_model does not recognise. Some advantages of tab_model over glm_coef are:
Recognises labels from variables and use those labels to display the table. Includes some statistics about the model. It can display more than one model on the same output. Tables are more aesthetically appealing. In the previous table of coefficients confidence intervals and p-values for race had not been adjusted for multiple comparisons. We use functions from the emmeans package to make the corrections.
multiple(model_bwt, ~ race)$fig_ci %>%
gf_labs(x = "Difference in birth weights (g)")
multiple(model_bwt, ~ race)$fig_pval %>%
gf_labs(y = " ")
data(Bernard)
head(Bernard)
id | treat | race | fate | apache | o2del | followup | temp0 | temp10 |
---|---|---|---|---|---|---|---|---|
1.00 | Placebo | White | Dead | 27.00 | 539.20 | 50.00 | 35.22 | 36.56 |
2.00 | Ibuprofen | African American | Alive | 14.00 | 720.00 | 38.67 | 37.56 | |
3.00 | Placebo | African American | Dead | 33.00 | 551.12 | 33.00 | 38.33 | |
4.00 | Ibuprofen | White | Alive | 3.00 | 1376.14 | 720.00 | 38.33 | 36.44 |
5.00 | Placebo | White | Alive | 5.00 | 720.00 | 38.56 | 37.56 | |
6.00 | Ibuprofen | White | Alive | 13.00 | 1523.39 | 720.00 | 38.17 | 38.17 |
Bernard %>%
mutate(
fate = relevel(fate, ref = "Dead"),
treat = relevel(treat, ref = "Ibuprofen")
) %>%
copy_labels(Bernard) %>%
cross_tab(fate ~ treat)
Mortality status | levels | Dead | Alive | Total |
---|---|---|---|---|
Total N (%) | 176 (38.7) | 279 (61.3) | 455 | |
Treatment | Ibuprofen | 84 (47.7) | 140 (50.2) | 224 (49.2) |
Placebo | 92 (52.3) | 139 (49.8) | 231 (50.8) |
dat <- matrix(c(84, 140 , 92, 139),
nrow = 2, byrow = TRUE)
epiR::epi.2by2(as.table(dat))
Outcome + Outcome - Total Inc risk * Odds
Exposed + 84 140 224 37.5 0.600
Exposed - 92 139 231 39.8 0.662
Total 176 279 455 38.7 0.631
Point estimates and 95% CIs:
-------------------------------------------------------------------
Inc risk ratio 0.94 (0.75, 1.19)
Odds ratio 0.91 (0.62, 1.32)
Attrib risk * -2.33 (-11.27, 6.62)
Attrib risk in population * -1.15 (-8.88, 6.59)
Attrib fraction in exposed (%) -6.20 (-33.90, 15.76)
Attrib fraction in population (%) -2.96 (-15.01, 7.82)
-------------------------------------------------------------------
Test that OR = 1: chi2(1) = 0.260 Pr>chi2 = 0.61
Wald confidence limits
CI: confidence interval
* Outcomes per 100 population units
Bernard %>%
contingency(fate ~ treat)
Outcome
Predictor Dead Alive
Ibuprofen 84 140
Placebo 92 139
Outcome + Outcome - Total Inc risk * Odds
Exposed + 84 140 224 37.5 0.600
Exposed - 92 139 231 39.8 0.662
Total 176 279 455 38.7 0.631
Point estimates and 95% CIs:
-------------------------------------------------------------------
Inc risk ratio 0.94 (0.75, 1.19)
Odds ratio 0.91 (0.62, 1.32)
Attrib risk * -2.33 (-11.27, 6.62)
Attrib risk in population * -1.15 (-8.88, 6.59)
Attrib fraction in exposed (%) -6.20 (-33.90, 15.76)
Attrib fraction in population (%) -2.96 (-15.01, 7.82)
-------------------------------------------------------------------
Test that OR = 1: chi2(1) = 0.260 Pr>chi2 = 0.61
Wald confidence limits
CI: confidence interval
* Outcomes per 100 population units
Pearson's Chi-squared test with Yates' continuity correction
data: dat
X-squared = 0.17076, df = 1, p-value = 0.6794
data(oswego, package = "epitools")
oswego <- oswego %>%
mutate(
ill = factor(ill, labels = c("No", "Yes")),
sex = factor(sex, labels = c("Female", "Male")),
chocolate.ice.cream = factor(chocolate.ice.cream, labels = c("No", "Yes"))
) %>%
var_labels(
ill = "Developed illness",
sex = "Sex",
chocolate.ice.cream = "Consumed chocolate ice cream"
)
oswego %>%
mutate(
ill = relevel(ill, ref = "Yes"),
chocolate.ice.cream = relevel(chocolate.ice.cream, ref = "Yes")
) %>%
copy_labels(oswego) %>%
cross_tab(ill ~ sex + chocolate.ice.cream)
Developed illness | levels | Yes | No | Total |
---|---|---|---|---|
Total N (%) | 46 (61.3) | 29 (38.7) | 75 | |
Sex | Female | 30 (65.2) | 14 (48.3) | 44 (58.7) |
Male | 16 (34.8) | 15 (51.7) | 31 (41.3) | |
Consumed chocolate ice cream | Yes | 25 (55.6) | 22 (75.9) | 47 (63.5) |
No | 20 (44.4) | 7 (24.1) | 27 (36.5) |
oswego %>%
mhor(ill ~ sex/chocolate.ice.cream)
OR Lower.CI Upper.CI Pr(>|z|)
sexFemale:chocolate.ice.creamYes 0.47 0.11 2.06 0.318
sexMale:chocolate.ice.creamYes 0.24 0.05 1.13 0.072
Common OR Lower CI Upper CI Pr(>|z|)
Cochran-Mantel-Haenszel: 0.35 0.12 1.01 0.085
Test for effect modification (interaction): p = 0.5434
data(Oncho)
odds_trend(mf ~ agegrp, data = Oncho, angle = 0, hjust = 0.5)$fig
Freq <- c(1739, 8, 51, 22)
BCG <- gl(2, 1, 4, labels=c("Negative", "Positive"))
Xray <- gl(2, 2, labels=c("Negative", "Positive"))
tb <- data.frame(Freq, BCG, Xray)
tb <- expand_df(tb)
diag_test(BCG ~ Xray, data = tb)
Outcome + Outcome - Total
Test + 22 51 73
Test - 8 1739 1747
Total 30 1790 1820
Point estimates and 95 % CIs:
---------------------------------------------------------
Apparent prevalence 0.04 (0.03, 0.05)
True prevalence 0.02 (0.01, 0.02)
Sensitivity 0.73 (0.54, 0.88)
Specificity 0.97 (0.96, 0.98)
Positive predictive value 0.30 (0.20, 0.42)
Negative predictive value 1.00 (0.99, 1.00)
Positive likelihood ratio 25.74 (18.21, 36.38)
Negative likelihood ratio 0.27 (0.15, 0.50)
---------------------------------------------------------
diag_test2(22, 51, 8, 1739)
Outcome + Outcome - Total
Test + 22 51 73
Test - 8 1739 1747
Total 30 1790 1820
Point estimates and 95 % CIs:
---------------------------------------------------------
Apparent prevalence 0.04 (0.03, 0.05)
True prevalence 0.02 (0.01, 0.02)
Sensitivity 0.73 (0.54, 0.88)
Specificity 0.97 (0.96, 0.98)
Positive predictive value 0.30 (0.20, 0.42)
Negative predictive value 1.00 (0.99, 1.00)
Positive likelihood ratio 25.74 (18.21, 36.38)
Negative likelihood ratio 0.27 (0.15, 0.50)
---------------------------------------------------------
data(birthwt, package = "MASS")
birthwt <- birthwt %>%
mutate(smoke = factor(smoke, labels = c("Non-smoker", "Smoker")),
race = factor(race, labels = c("White", "African American", "Other"))) %>%
var_labels(bwt = 'Birth weight (g)',
smoke = 'Smoking status',
race = 'Race')
birthwt %>%
group_by(race, smoke) %>%
summarise(
n = n(),
Mean = mean(bwt, na.rm = TRUE),
SD = sd(bwt, na.rm = TRUE),
Median = median(bwt, na.rm = TRUE),
CV = rel_dis(bwt)
)
`summarise()` regrouping output by 'race' (override with `.groups` argument)
# A tibble: 6 x 7
# Groups: race [3]
race smoke n Mean SD Median CV
<fct> <fct> <int> <dbl> <dbl> <dbl> <dbl>
1 White Non-smoker 44 3429. 710. 3593 0.207
2 White Smoker 52 2827. 626. 2776. 0.222
3 African American Non-smoker 16 2854. 621. 2920 0.218
4 African American Smoker 10 2504 637. 2381 0.254
5 Other Non-smoker 55 2816. 709. 2807 0.252
6 Other Smoker 12 2757. 810. 3146. 0.294
birthwt %>%
gen_bst_df(bwt ~ race|smoke)
Birth weight (g) | LowerCI | UpperCI | Race | Smoking status |
---|---|---|---|---|
3428.75 | 3210.57 | 3621.60 | White | Non-smoker |
2826.85 | 2668.94 | 2992.67 | White | Smoker |
2854.50 | 2571.50 | 3131.50 | African American | Non-smoker |
2504.00 | 2124.40 | 2883.20 | African American | Smoker |
2815.78 | 2615.37 | 2988.95 | Other | Non-smoker |
2757.17 | 2254.79 | 3121.87 | Other | Smoker |
birthwt %>%
bar_error(bwt ~ race, fill = ~ smoke) %>%
axis_labs() %>%
gf_labs(fill = "Smoking status:")
No summary function supplied, defaulting to `mean_se()`
model_norm <- lm(bwt ~ smoke + race, data = birthwt)
model_norm %>% Anova() %>% tidy()
term | sumsq | df | statistic | p.value |
---|---|---|---|---|
smoke | 7322574.73 | 1.00 | 15.46 | 0.00 |
race | 8712354.03 | 2.00 | 9.20 | 0.00 |
Residuals | 87631355.83 | 185.00 |
model_norm %>% tidy()
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 3334.95 | 91.78 | 36.34 | 0.00 |
smokeSmoker | -428.73 | 109.04 | -3.93 | 0.00 |
raceAfrican American | -450.36 | 153.12 | -2.94 | 0.00 |
raceOther | -452.88 | 116.48 | -3.89 | 0.00 |
model_norm %>%
glm_coef(labels = model_labels(model_norm))
Parameter | Coefficient | Pr(>|t|) |
---|---|---|
Constant | 3334.95 (3036.11, 3633.78) | < 0.001 |
Smoking status: Smoker | -428.73 (-667.02, -190.44) | < 0.001 |
Race: African American | -450.36 (-750.31, -150.4) | 0.003 |
Race: Other | -452.88 (-775.17, -130.58) | 0.006 |
model_norm %>%
glm_coef(se_rob = TRUE, labels = model_labels(model_norm))
Parameter | Coefficient | Pr(>|t|) |
---|---|---|
Constant | 3334.95 (3036.11, 3633.78) | < 0.001 |
Smoking status: Smoker | -428.73 (-667.02, -190.44) | < 0.001 |
Race: African American | -450.36 (-750.31, -150.4) | 0.003 |
Race: Other | -452.88 (-775.17, -130.58) | 0.006 |
model_norm %>% glance()
r.squared | adj.r.squared | sigma | statistic | p.value | df | logLik | AIC | BIC | deviance | df.residual |
---|---|---|---|---|---|---|---|---|---|---|
0.12 | 0.11 | 688.25 | 8.68 | 0.00 | 4 | -1501.11 | 3012.22 | 3028.43 | 87631355.83 | 185 |
model_norm %>%
plot_model("pred", terms = ~race|smoke, dot.size = 1.5, title = "")
emmip(model_norm, smoke ~ race) %>%
gf_labs(y = get_label(birthwt$bwt), x = "", col = "Smoking status")
data(diet, package = "Epi")
diet <- diet %>%
mutate(
chd = factor(chd, labels = c("No CHD", "CHD"))
) %>%
var_labels(
chd = "Coronary Heart Disease",
fibre = "Fibre intake (10 g/day)"
)
diet %>% estat(~ fibre|chd)
Coronary Heart Disease | N | Min. | Max. | Mean | Median | SD | CV | |
---|---|---|---|---|---|---|---|---|
Fibre intake (10 g/day) | No CHD | 288.00 | 0.60 | 5.35 | 1.75 | 1.69 | 0.58 | 0.33 |
CHD | 45.00 | 0.76 | 2.43 | 1.49 | 1.51 | 0.40 | 0.27 |
diet %>%
gf_boxploth(chd ~ fibre, fill = "indianred3", alpha = 0.7) %>%
axis_labs()
Warning: Removed 4 rows containing non-finite values (stat_boxploth).
model_binom <- glm(chd ~ fibre, data = diet, family = binomial)
model_binom %>%
glm_coef(labels = model_labels(model_binom))
Parameter | Odds ratio | Pr(>|z|) |
---|---|---|
Constant | 0.95 (0.3, 3.01) | 0.93 |
Fibre intake (10 g/day) | 0.33 (0.16, 0.67) | 0.00 |
model_binom %>%
plot_model("pred", terms = "fibre [all]", title = "")
data(bdendo, package = "Epi")
bdendo <- bdendo %>%
mutate(
cancer = factor(d, labels = c('Control', 'Case')),
gall = factor(gall, labels = c("No GBD", "GBD")),
est = factor(est, labels = c("No oestrogen", "Oestrogen"))
) %>%
var_labels(
cancer = 'Endometrial cancer',
gall = 'Gall bladder disease',
est = 'Oestrogen'
)
bdendo %>%
mutate(
cancer = relevel(cancer, ref = "Case"),
est = relevel(est, ref = "Oestrogen"),
gall = relevel(gall, ref = "GBD")
) %>%
copy_labels(bdendo) %>%
cross_tab(cancer ~ est + gall)
Endometrial cancer | levels | Case | Control | Total |
---|---|---|---|---|
Total N (%) | 63 (20.0) | 252 (80.0) | 315 | |
Oestrogen | Oestrogen | 56 (88.9) | 127 (50.4) | 183 (58.1) |
No oestrogen | 7 (11.1) | 125 (49.6) | 132 (41.9) | |
Gall bladder disease | GBD | 17 (27.0) | 24 (9.5) | 41 (13.0) |
No GBD | 46 (73.0) | 228 (90.5) | 274 (87.0) |
library(survival)
model_clogit <- clogit(cancer == 'Case' ~ est * gall + strata(set), data = bdendo)
model_clogit %>%
glm_coef(labels = c("Oestrogen/No oestrogen", "GBD/No GBD",
"Oestrogen:GBD Interaction"))
Parameter | Odds ratio | Pr(>|z|) |
---|---|---|
Oestrogen/No oestrogen | 14.88 (4.49, 49.36) | < 0.001 |
GBD/No GBD | 18.07 (3.2, 102.01) | 0.001 |
Oestrogen:GBD Interaction | 0.13 (0.02, 0.9) | 0.039 |
require(ggeffects)
Loading required package: ggeffects
bdendo_pred <- ggemmeans(model_clogit, terms = c('gall', 'est'))
bdendo_pred %>%
gf_pointrange(predicted + conf.low + conf.high ~ x|group, col = ~ x) %>%
gf_labs(y = "P(cancer)", x = "", col = get_label(bdendo$gall))
library(ordinal)
Attaching package: 'ordinal'
The following object is masked from 'package:dplyr':
slice
data(housing, package = "MASS")
housing <- housing %>%
var_labels(
Sat = "Satisfaction",
Infl = "Perceived influence",
Type = "Type of rental",
Cont = "Contact"
)
model_clm <- clm(Sat ~ Infl + Type + Cont, weights = Freq, data = housing)
model_clm %>%
glm_coef(labels = model_labels(model_clm, intercept = FALSE))
Parameter | Ordinal OR | Pr(>|Z|) |
---|---|---|
Perceived influence: Low | 0.61 (0.48, 0.78) | < 0.001 |
Perceived influence: Medium | 2 (1.56, 2.55) | < 0.001 |
Contact: Low | 1.76 (1.44, 2.16) | < 0.001 |
Perceived influence: High | 3.63 (2.83, 4.66) | < 0.001 |
Contact: High | 0.56 (0.45, 0.71) | < 0.001 |
Type of rental: Apartment | 0.69 (0.51, 0.94) | 0.018 |
Type of rental: Atrium | 0.34 (0.25, 0.45) | < 0.001 |
Type of rental: Terrace | 1.43 (1.19, 1.73) | < 0.001 |
model_clm %>%
plot_model(type = "pred", terms = c("Infl", "Cont"),
dot.size = 1, title = "") %>%
gf_theme(axis.text.x = element_text(angle = 45, hjust = 1))
model_clm %>%
plot_model(type = "pred", terms = c("Infl", "Type"),
dot.size = 1, title = "") %>%
gf_theme(axis.text.x = element_text(angle = 45, hjust = 1))
emmip(model_clm, Infl ~ Type |Cont) %>%
gf_labs(x = "Type of rental", col = "Perceived influence")
data(quine, package = "MASS")
levels(quine$Eth) <- c("Aboriginal", "White")
levels(quine$Sex) <- c("Female", "Male")
quine <- quine %>%
var_labels(
Days = "Number of absent days",
Eth = "Ethnicity",
Age = "Age group"
)
EDA
quine %>%
group_by(Eth, Sex, Age) %>%
summarise(
n = n(),
Mean = mean(Days, na.rm = TRUE),
SD = sd(Days, na.rm = TRUE),
Median = median(Days, na.rm = TRUE),
CV = rel_dis(Days)
)
`summarise()` regrouping output by 'Eth', 'Sex' (override with `.groups` argument)
# A tibble: 16 x 8
# Groups: Eth, Sex [4]
Eth Sex Age n Mean SD Median CV
<fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl>
1 Aboriginal Female F0 5 17.6 17.4 11 0.987
2 Aboriginal Female F1 15 18.9 16.3 13 0.865
3 Aboriginal Female F2 9 32.6 27.3 20 0.839
4 Aboriginal Female F3 9 14.6 14.9 10 1.02
5 Aboriginal Male F0 8 11.5 7.23 12 0.629
6 Aboriginal Male F1 5 9.6 4.51 7 0.469
7 Aboriginal Male F2 11 30.9 17.8 32 0.576
8 Aboriginal Male F3 7 27.1 10.4 28 0.382
9 White Female F0 5 19.8 9.68 20 0.489
10 White Female F1 17 7.76 6.48 6 0.834
11 White Female F2 10 5.7 4.97 4 0.872
12 White Female F3 10 13.5 11.5 12 0.851
13 White Male F0 9 13.6 20.9 7 1.54
14 White Male F1 9 5.56 5.39 5 0.970
15 White Male F2 10 15.2 12.9 12 0.848
16 White Male F3 7 27.3 22.9 27 0.840
Model
model_pois <- glm(Days ~ Eth + Sex + Age, family = poisson, data = quine)
model_pois %>%
glm_coef(labels = model_labels(model_pois), se_rob = TRUE)
Parameter | Rate ratio | Pr(>|z|) |
---|---|---|
Constant | 17.66 (10.66, 29.24) | < 0.001 |
Ethnicity: White | 0.59 (0.39, 0.88) | 0.01 |
Sex: Male | 1.11 (0.77, 1.6) | 0.57 |
Age group: F1 | 0.8 (0.42, 1.5) | 0.482 |
Age group: F2 | 1.42 (0.85, 2.36) | 0.181 |
Age group: F3 | 1.35 (0.77, 2.34) | 0.292 |
model_pois %>% glance()
null.deviance | df.null | logLik | AIC | BIC | deviance | df.residual |
---|---|---|---|---|---|---|
2073.53 | 145 | -1165.49 | 2342.98 | 2360.88 | 1742.50 | 140 |
deviance(model_pois) / df.residual(model_pois)
[1] 12.44646
Thus, we have over-dispersion. One option is to use a negative binomial distribution.
library(MASS)
Attaching package: 'MASS'
The following objects are masked _by_ '.GlobalEnv':
birthwt, housing, quine
The following object is masked from 'package:dplyr':
select
model_negbin <- glm.nb(Days ~ Eth + Sex + Age, data = quine)
model_negbin %>%
glm_coef(labels = model_labels(model_negbin), se_rob = TRUE)
Parameter | Rate ratio | Pr(>|z|) |
---|---|---|
Constant | 20.24 (12.24, 33.47) | < 0.001 |
Ethnicity: White | 0.57 (0.38, 0.84) | 0.005 |
Sex: Male | 1.07 (0.74, 1.53) | 0.73 |
Age group: F1 | 0.69 (0.39, 1.23) | 0.212 |
Age group: F2 | 1.2 (0.7, 2.03) | 0.507 |
Age group: F3 | 1.29 (0.73, 2.28) | 0.385 |
model_negbin %>% glance()
null.deviance | df.null | logLik | AIC | BIC | deviance | df.residual |
---|---|---|---|---|---|---|
192.24 | 145 | -547.83 | 1109.65 | 1130.54 | 167.86 | 140 |
age group is a factor with more than two levels and is significant:
model_negbin %>% Anova()
LR Chisq | Df | Pr(>Chisq) |
---|---|---|
12.66 | 1.00 | 0.00 |
0.15 | 1.00 | 0.70 |
9.48 | 3.00 | 0.02 |
model_negbin %>%
plot_model(type = "pred", terms = c("Age", "Eth"),
dot.size = 1.5, title = "")
emmip(model_negbin, Eth ~ Age|Sex) %>%
gf_labs(y = "Number of absent days", x = "Age group", col = "Ethnicity")
## Adjusting CIs and p-values for multiple comparisons
multiple(model_negbin, ~ Age|Eth)$df
contrast | Eth | ratio | SE | z.ratio | p.value | lower.CL | upper.CL |
---|---|---|---|---|---|---|---|
F0 / F1 | Aboriginal | 1.44 | 0.34 | 1.57 | 0.40 | 0.79 | 2.62 |
F0 / F2 | Aboriginal | 0.84 | 0.19 | -0.77 | 0.86 | 0.46 | 1.52 |
F0 / F3 | Aboriginal | 0.78 | 0.19 | -1.04 | 0.72 | 0.42 | 1.45 |
F1 / F2 | Aboriginal | 0.58 | 0.12 | -2.65 | 0.04 | 0.34 | 0.98 |
F1 / F3 | Aboriginal | 0.54 | 0.12 | -2.89 | 0.02 | 0.31 | 0.93 |
F2 / F3 | Aboriginal | 0.93 | 0.20 | -0.34 | 0.99 | 0.53 | 1.63 |
F0 / F1 | White | 1.44 | 0.34 | 1.57 | 0.40 | 0.79 | 2.62 |
F0 / F2 | White | 0.84 | 0.19 | -0.77 | 0.86 | 0.46 | 1.52 |
F0 / F3 | White | 0.78 | 0.19 | -1.04 | 0.72 | 0.42 | 1.45 |
F1 / F2 | White | 0.58 | 0.12 | -2.65 | 0.04 | 0.34 | 0.98 |
F1 / F3 | White | 0.54 | 0.12 | -2.89 | 0.02 | 0.31 | 0.93 |
F2 / F3 | White | 0.93 | 0.20 | -0.34 | 0.99 | 0.53 | 1.63 |
multiple(model_negbin, ~ Age|Eth)$fig_ci %>%
gf_labs(x = "IRR")
multiple(model_negbin, ~ Age|Eth)$fig_ci %>%
gf_labs(x = "IRR")
effect of thiotepa versus placebo on the development of bladder cancer.
library(survival)
data(bladder)
bladder <- bladder %>%
mutate(times = stop,
rx = factor(rx, labels=c("Placebo", "Thiotepa"))
) %>%
var_labels(times = "Survival time",
rx = "Treatment")
model_surv <- survreg(Surv(times, event) ~ rx, data = bladder)
model_surv %>%
glm_coef(labels = c("Treatment: Thiotepa/Placebo", "Scale"), se_rob = TRUE)
Parameter | Survival time ratio | Pr(>|z|) |
---|---|---|
Treatment: Thiotepa/Placebo | 1.64 (0.89, 3.04) | 0.116 |
Scale | 1 (0.85, 1.18) | 0.992 |
model_exp <- survreg(Surv(times, event) ~ rx, data = bladder, dist = "exponential")
model_exp %>%
glm_coef(labels = c("Treatment: Thiotepa/Placebo"), se_rob = TRUE)
Parameter | Survival time ratio | Pr(>|z|) |
---|---|---|
Treatment: Thiotepa/Placebo | 1.64 (0.85, 3.16) | 0.139 |
Using naive standard errors
model_exp %>%
glm_coef(labels = c("Treatment: Thiotepa/Placebo"))
Parameter | Survival time ratio | Pr(>|z|) |
---|---|---|
Treatment: Thiotepa/Placebo | 1.64 (0.85, 3.16) | 0.139 |
model_exp %>%
plot_model(type = "pred", terms = ~ rx, dot.size = 1.5, title = "") %>%
gf_labs(y = "Survival time", x = "Treatment", title = "")
### Cox proportional hazards regression
model_cox <- coxph(Surv(times, event) ~ rx, data = bladder)
model_cox %>%
glm_coef(labels = c("Treatment: Thiotepa/Placebo"))
Parameter | Hazard ratio | Pr(>|z|) |
---|---|---|
Treatment: Thiotepa/Placebo | 0.64 (0.44, 0.94) | 0.02 |
model_cox %>%
plot_model(type = "pred", terms = ~ rx, dot.size = 1.5,
title = "") %>%
gf_labs(x = "Treatment", title = "")
relationship between sex and age on the distance from the pituitary to the pterygomaxillary fissure (mm).
library(nlme)
Attaching package: 'nlme'
The following objects are masked from 'package:ordinal':
ranef, VarCorr
The following object is masked from 'package:dplyr':
collapse
data(Orthodont)
Orthodont <- Orthodont %>%
var_labels(
distance = "Pituitary distance (mm)",
age = "Age (years)"
)
model_lme <- lme(distance ~ Sex * I(age - mean(age, na.rm = TRUE)), random = ~ 1|Subject,
method = "ML", data = Orthodont)
model_lme %>%
glm_coef(labels = c(
"Constant",
"Sex: female-male",
"Age (years)",
"Sex:Age interaction"
))
Parameter | Coefficient | Pr(>|t|) |
---|---|---|
Constant | 24.97 (24.03, 25.9) | < 0.001 |
Sex: female-male | -2.32 (-3.78, -0.86) | 0.005 |
Age (years) | 0.78 (0.63, 0.94) | < 0.001 |
Sex:Age interaction | -0.3 (-0.54, -0.07) | 0.015 |
model_lme %>%
plot_model("pred", terms = age ~ Sex,
show.data = TRUE, jitter = 0.1, dot.size = 1.5) %>%
gf_labs(y = get_label(Orthodont$distance), x = "Age (years)", title = "")