pacman::p_load(tidyverse,
gtsummary,
broom,
janitor)
theme_set(theme_minimal())
df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTMpianDs7wFsUux8WkyAw-RNahK97xrRbrFd87GQ2UuHj1euwReUdZrPKl-OFta3fBXrcVu5HO8W4D/pub?gid=0&single=true&output=csv")
Rows: 371 Columns: 24── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Group, Start date, Follow-up date, GA during follow-up, Emergencies during follow-up, Mayor complications
dbl (18): Count, Patient code, Number of treatment visits, Number of treatments during follow-up, Number of complication treatment, Number of G...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
df <- df %>% clean_names()
Abbreviated dataframe
df_short <- df %>%
select(group_id = group,
costs_of_program_v1,
costs_of_complications_v2)
AVERAGE of Costs and complications
group_stats <- df_short %>%
group_by(group_id) %>%
summarize(cost_program = mean(costs_of_program_v1),
cost_complications = mean(costs_of_complications_v2)
) %>%
mutate(across(where(is.numeric), round, 1))
Add the reference value from P2
df_short$P2_cost_v1 <- as.numeric("56.50")
df_short$p2_complications_v2 <- as.numeric("163.5")
Calculate the difference
df_short <- df_short %>%
mutate(v1 = costs_of_program_v1 - P2_cost_v1,
v2 = costs_of_complications_v2 - p2_complications_v2)
df_short %>%
tabyl(group_id) %>%
adorn_totals("row") %>%
adorn_pct_formatting()
group_id n percent
P1 58 15.6%
P2 60 16.2%
SDF1 65 17.5%
SDF2 65 17.5%
TF1 58 15.6%
TF2 65 17.5%
Total 371 100.0%
df_summary <- df_short %>%
group_by(group_id) %>%
summarize(mean_v1 = mean(v1),
mean_v2 = mean(v2),
sd_v1 = sd(v1),
sd_v2 = sd(v2),
n = n())
df_short %>%
select(group_id, v1, v2 ) %>%
group_by(group_id) %>%
summarise(n = n(),
v1_mean = mean(v1), v1_sd = sd(v1),
v2_mean = mean(v2), v2_sd = sd(v2) ) %>%
mutate(across(where(is.numeric), round, 1))
df_short %>%
select(group_id, v1, v2) %>%
gtsummary::tbl_summary(by = group_id) %>%
add_ci() %>%
add_p()
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
| Characteristic | P1, N = 581 | 95% CI2 | P2, N = 601 | 95% CI2 | SDF1, N = 651 | 95% CI2 | SDF2, N = 651 | 95% CI2 | TF1, N = 581 | 95% CI2 | TF2, N = 651 | 95% CI2 | p-value3 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| v1 | 64 (41, 64) | 43, 59 | 4 (4, 4) | -5.0, 5.0 | 72 (72, 72) | 56, 74 | 8 (-24, 8) | -3.1, 8.2 | 70 (70, 70) | 57, 70 | 6 (6, 6) | -2.4, 6.7 | <0.001 |
| v2 | -164 (-164, 109) | -54, 88 | -164 (-164, 89) | -64, 64 | -164 (-164, 56) | -51, 95 | -164 (-164, -164) | -136, -57 | -128 (-164, 152) | -53, 75 | -94 (-164, -14) | -86, 0.52 | 0.011 |
| 1 Median (IQR) | |||||||||||||
| 2 CI = Confidence Interval | |||||||||||||
| 3 Kruskal-Wallis rank sum test | |||||||||||||
Ratio V2/V1
df_short <- df_short %>%
mutate(ratio_V2_v1 = v2/v1)
df_short %>%
# mutate(ratio_V2_v1 = v2/v1) %>%
select(group_id,
v1, v2, ratio_V2_v1) %>%
gtsummary::tbl_summary(by = group_id) %>%
add_ci() %>%
add_p()
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "v2"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "ratio_V2_v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "ratio_V2_v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "ratio_V2_v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "ratio_V2_v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "ratio_V2_v1"; however, no mean is shown in the `tbl_summary()` table.
✖ `add_ci()` added mean CI for "ratio_V2_v1"; however, no mean is shown in the `tbl_summary()` table.
| Characteristic | P1, N = 581 | 95% CI2 | P2, N = 601 | 95% CI2 | SDF1, N = 651 | 95% CI2 | SDF2, N = 651 | 95% CI2 | TF1, N = 581 | 95% CI2 | TF2, N = 651 | 95% CI2 | p-value3 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| v1 | 64 (41, 64) | 43, 59 | 4 (4, 4) | -5.0, 5.0 | 72 (72, 72) | 56, 74 | 8 (-24, 8) | -3.1, 8.2 | 70 (70, 70) | 57, 70 | 6 (6, 6) | -2.4, 6.7 | <0.001 |
| v2 | -164 (-164, 109) | -54, 88 | -164 (-164, 89) | -64, 64 | -164 (-164, 56) | -51, 95 | -164 (-164, -164) | -136, -57 | -128 (-164, 152) | -53, 75 | -94 (-164, -14) | -86, 0.52 | 0.011 |
| ratio_V2_v1 | -2 (-3, 5) | -2.5, 20 | -12 (-47, 6) | -15, 13 | -2 (-2, 0) | -0.95, 11 | -22 (-22, 2) | -14, -3.6 | -2 (-2, 2) | -3.3, 3.5 | -4 (-25, 7) | -9.0, 1.7 | <0.001 |
| 1 Median (IQR) | |||||||||||||
| 2 CI = Confidence Interval | |||||||||||||
| 3 Kruskal-Wallis rank sum test | |||||||||||||
df_short %>%
ggplot(aes(x = ratio_V2_v1)) +
geom_histogram(bins = 10) +
scale_x_log10() +
facet_grid(group_id ~ .) +
labs(title = "Distribution of averted costs compared to P2",
y = "Count",
x = "Averted costs")
df_short %>%
filter(group_id != "P2") %>%
ggplot(aes(x = fct_reorder(group_id,ratio_V2_v1) ,
y = ratio_V2_v1)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha = .3, width = .2)
df_short %>%
separate(group_id, into = c("compound", "freq"), sep = "(?<=\\D)(?=\\d)", convert = TRUE) %>%
group_by(compound, freq) %>%
summarize(mean_ratio = mean(ratio_V2_v1)) %>%
ggplot(aes(x = as.factor(freq),
y = mean_ratio,
group = compound,
color = compound)) +
geom_line() +
geom_point() +
labs(title = "Interaction plot: compound vs frequency",
y = "Mean ratio v2/v1",
x = "Frequency",
color = "Compound")
`summarise()` has grouped output by 'compound'. You can override using the `.groups` argument.
df_short %>%
separate(group_id, into = c("compound", "freq"), sep = "(?<=\\D)(?=\\d)", convert = TRUE) %>%
mutate(freq = as.factor(freq)) %>%
with(glm(ratio_V2_v1 ~ compound + freq + compound*freq )) %>%
gtsummary::tbl_regression()
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| compound | |||
| P | — | — | |
| SDF | -3.9 | -15, 7.7 | 0.5 |
| TF | -8.9 | -21, 3.0 | 0.14 |
| freq | |||
| 1 | — | — | |
| 2 | -10 | -22, 1.8 | 0.10 |
| compound * freq | |||
| SDF * 2 | -3.7 | -20, 13 | 0.7 |
| TF * 2 | 6.3 | -10, 23 | 0.5 |
| 1 CI = Confidence Interval | |||
df_short %>%
separate(group_id, into = c("compound", "freq"), sep = "(?<=\\D)(?=\\d)", convert = TRUE) %>%
mutate(freq = as.factor(freq)) %>%
with(glm(ratio_V2_v1 ~ compound + freq + compound*freq )) %>%
summary()
Call:
glm(formula = ratio_V2_v1 ~ compound + freq + compound * freq)
Deviance Residuals:
Min 1Q Median 3Q Max
-55.664 -13.211 -5.832 4.260 230.050
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.950 4.276 2.093 0.0370 *
compoundSDF -3.866 5.882 -0.657 0.5114
compoundTF -8.872 6.047 -1.467 0.1432
freq2 -9.956 5.997 -1.660 0.0977 .
compoundSDF:freq2 -3.717 8.282 -0.449 0.6538
compoundTF:freq2 6.256 8.400 0.745 0.4569
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 1060.544)
Null deviance: 399134 on 370 degrees of freedom
Residual deviance: 387099 on 365 degrees of freedom
AIC: 3645.4
Number of Fisher Scoring iterations: 2