# install.packages("pacman")
# writeLines(pacman::p_lib(), "~/Desktop/list_of_R_packages.csv") # to quickly back up packages
# remotes::install_github("ThinkR-open/remedy")
pacman::p_load(tidyverse,ggplot2, janitor, data.table, here, rio, kableExtra, ggstatsplot,
knitr,
rmarkdown,
webshot,
namer
)
#
# install.packages("easystats", repos = "https://easystats.r-universe.dev")
#
# easystats::install_suggested()
#Knit to pdf
# knit2pdf(input = "cages.Rmd", here::here("output", "cages.Rmd"))
#
# render("cages.Rmd", output_format = "pdf_document")
pea_data <- rio::import(here("data", "cage_data.xlsx"), which = 1) %>%
janitor::clean_names() %>%
dplyr::mutate(edge_d = as.factor(edge_d)) %>%
dplyr::mutate(mesh = as.factor(fence))
head(pea_data)
## cage fence edge_d hole_d height n_plants pods pod_mass mesh
## 1 HR56 big 0 1.8 63 60 260 52 big
## 2 HR53 big 0 2.6 50 41 325 70 big
## 3 HR50 big 0 5.6 90 97 566 140 big
## 4 HR47 big 0 10.8 90 104 591 146 big
## 5 HR44 big 0 0.4 50 28 54 14 big
## 6 HR32 big 0 4.2 50 57 400 82 big
# Function to summarise
baseline_table <- function(data, variables, grouping_var) {
data %>%
group_by(!!!syms(grouping_var)) %>%
summarise(
across(
all_of(variables),
~ paste0(mean(.) %>% round(2), "(±", sd(.) %>% round(2), ")")
)
) %>% unite(
"grouping",
all_of(grouping_var)
) %>% pivot_longer(
cols = -"grouping",
names_to = "variables"
) %>% pivot_wider(
names_from = "grouping"
)
}
baseline_table(
pea_data,
variables = c("height", "n_plants", "pods", "pod_mass"),
grouping_var = "mesh"
) %>%
kbl(caption = "Means and SD for each mesh size") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| variables | big | middle | small |
|---|---|---|---|
| height | 69.22(±16.75) | 73.75(±10.61) | 80.56(±10.74) |
| n_plants | 71.78(±27.12) | 76.5(±20.02) | 94.89(±10.07) |
| pods | 382.22(±163.04) | 366.25(±106.57) | 474.67(±101.75) |
| pod_mass | 90.22(±44.47) | 87.75(±30.84) | 118.44(±34.81) |
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
The ANOVA was significant but there were no significant pairwise contrasts (!). The Games-Howell post-hoc test involves a correction for multiple comparisons and makes the test more conservative.
ggplot(data = pea_data, aes(x = mesh, y = height, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
# since the confidence intervals for the effect sizes are computed using
## bootstrapping, important to set a seed for reproducibility
set.seed(123)
ggbetweenstats(data = pea_data,
x= mesh,
y = height,
pairwise.display = "all",
title = "Distribution of plant height across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea_data,
x= mesh,
y = height) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1.65 2 15.0 0.225 One-w… Omega2 0.0676 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.507 Bayes… -0.679 Bayesi… 0
## 2 mesh-big 0.902 cauchy 0 0.707 0.507 Bayes… -0.679 Bayesi… 0
## 3 mesh-middle 0.573 cauchy 0 0.707 0.507 Bayes… -0.679 Bayesi… 0
## 4 mesh-small 0.929 cauchy 0 0.707 0.507 Bayes… -0.679 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.507 Bayes… -0.679 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.507 Bayes… -0.679 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 0.952 0.820 two.sided q Holm Game… <language>
## 2 big small 2.42 0.712 two.sided q Holm Game… <language>
## 3 middle small 1.86 0.820 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = pea_data, aes(x = mesh, y = n_plants, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = pea_data,
x= mesh,
y = n_plants,
pairwise.display = "all",
title = "Distribution of number of plants across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea_data,
x= mesh,
y = n_plants) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 4.65 2 12.7 0.0304 One-w… Omega2 0.317 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 1.36 Bayes… 0.311 Bayesi… 0.0502
## 2 mesh-big 0.930 cauchy 0 0.707 1.36 Bayes… 0.311 Bayesi… 0.0502
## 3 mesh-middle 0.762 cauchy 0 0.707 1.36 Bayes… 0.311 Bayesi… 0.0502
## 4 mesh-small 0.984 cauchy 0 0.707 1.36 Bayes… 0.311 Bayesi… 0.0502
## 5 sig2 1 cauchy 0 0.707 1.36 Bayes… 0.311 Bayesi… 0.0502
## 6 g_mesh 1 cauchy 0 0.707 1.36 Bayes… 0.311 Bayesi… 0.0502
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 0.582 0.912 two.sided q Holm Game… <language>
## 2 big small 3.39 0.260 two.sided q Holm Game… <language>
## 3 middle small 3.32 0.260 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = pea_data, aes(x = mesh, y = pods, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = pea_data,
x= mesh,
y = pods,
pairwise.display = "s",
title = "Distribution of number of pods across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea_data,
x= mesh,
y = pods) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2.45 2 14.9 0.121 One-w… Omega2 0.139 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.551 Bayes… -0.596 Bayesi… 0
## 2 mesh-big 0.745 cauchy 0 0.707 0.551 Bayes… -0.596 Bayesi… 0
## 3 mesh-middle 0.840 cauchy 0 0.707 0.551 Bayes… -0.596 Bayesi… 0
## 4 mesh-small 0.949 cauchy 0 0.707 0.551 Bayes… -0.596 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.551 Bayes… -0.596 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.551 Bayes… -0.596 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle -0.342 0.968 two.sided q Holm Game… <language>
## 2 big small 2.04 0.696 two.sided q Holm Game… <language>
## 3 middle small 3.02 0.349 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
ggplot(data = pea_data, aes(x = mesh, y = pod_mass, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = pea_data, x= mesh, y = pod_mass, title = "Distribution of pod mass across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea_data,
x= mesh,
y = pod_mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2.02 2 15.2 0.167 One-w… Omega2 0.101 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.548 Bayes… -0.602 Bayesi… 0
## 2 mesh-big 0.776 cauchy 0 0.707 0.548 Bayes… -0.602 Bayesi… 0
## 3 mesh-middle 0.818 cauchy 0 0.707 0.548 Bayes… -0.602 Bayesi… 0
## 4 mesh-small 0.949 cauchy 0 0.707 0.548 Bayes… -0.602 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.548 Bayes… -0.602 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.548 Bayes… -0.602 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle -0.190 0.990 two.sided q Holm Game… <language>
## 2 big small 2.12 0.638 two.sided q Holm Game… <language>
## 3 middle small 2.73 0.496 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea_data,
x= hole_d,
y = height,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Plant height",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Plant height relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg ggplot2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(height ~ hole_d + mesh, data = pea_data)
summary(mod)
##
## Call:
## lm(formula = height ~ hole_d + mesh, data = pea_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.8838 -8.0613 0.9158 8.0544 20.6226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.6010 5.4761 10.884 2.53e-10 ***
## hole_d 1.7458 0.6912 2.526 0.0192 *
## meshmiddle 3.8270 5.7417 0.667 0.5120
## meshsmall 11.8571 5.5676 2.130 0.0446 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.8 on 22 degrees of freedom
## Multiple R-squared: 0.3248, Adjusted R-squared: 0.2327
## F-statistic: 3.527 on 3 and 22 DF, p-value: 0.03171
coef(mod)
## (Intercept) hole_d meshmiddle meshsmall
## 59.601010 1.745784 3.827039 11.857069
Pearson’s correlation test revealed that, across the 9 cages with big mesh size, the distance to the nearest hole (m) was positively correlated with plant height, and this effect was statistically significant. The effect size (r=0.81) was large, as per Cohen’s (1988) conventions. The Bayes Factor for the same analysis revealed that the data were 7.14 times more probable under the alternative hypothesis as compared to the null hypothesis. This can be considered as moderate evidence (Jeffreys, 1961) in favor of the alternative hypothesis (of a correlation between these two variables).
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea_data,
x= hole_d,
y = n_plants,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of plants",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of plants relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea_data,
x= hole_d,
y = pods,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of pods",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of pods relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea_data,
x= hole_d,
y = pod_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Pod mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Pod mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Mesh big and middle merged to have accessible not accessible
bi_pea <-pea_data %>%
dplyr::mutate(mesh = recode(mesh, big = "accessible", middle = "accessible", small = "not accessible"))
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_pea,
x= hole_d,
y = height,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Plant height",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Plant height relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(height ~ hole_d + mesh, data = bi_pea)
summary(mod)
##
## Call:
## lm(formula = height ~ hole_d + mesh, data = bi_pea)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.0282 -6.0630 0.7847 9.3401 18.8239
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.275 4.807 12.747 6.56e-12 ***
## hole_d 1.768 0.682 2.593 0.0163 *
## meshnot accessible 10.067 4.818 2.090 0.0479 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.66 on 23 degrees of freedom
## Multiple R-squared: 0.3111, Adjusted R-squared: 0.2512
## F-statistic: 5.194 on 2 and 23 DF, p-value: 0.01376
coef(mod)
## (Intercept) hole_d meshnot accessible
## 61.275087 1.768045 10.066992
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_pea,
x= hole_d,
y = n_plants,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of plants",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of plants relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(n_plants ~ hole_d + mesh, data = bi_pea)
summary(mod)
##
## Call:
## lm(formula = n_plants ~ hole_d + mesh, data = bi_pea)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.1588 -12.2967 -0.7523 13.2795 25.8317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.8877 7.0074 7.976 4.52e-08 ***
## hole_d 3.1776 0.9942 3.196 0.00401 **
## meshnot accessible 22.4424 7.0231 3.196 0.00402 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17 on 23 degrees of freedom
## Multiple R-squared: 0.4538, Adjusted R-squared: 0.4063
## F-statistic: 9.553 on 2 and 23 DF, p-value: 0.000955
coef(mod)
## (Intercept) hole_d meshnot accessible
## 55.887729 3.177591 22.442378
# Distance effect only for accessible cages
acc_pea <- bi_pea %>%
dplyr::filter(mesh == "accessible")
mod1 <- lm(n_plants ~ hole_d, data = acc_pea)
summary(mod1)
##
## Call:
## lm(formula = n_plants ~ hole_d, data = acc_pea)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.997 -14.737 -1.673 9.545 28.933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.586 8.269 5.392 7.48e-05 ***
## hole_d 5.160 1.266 4.078 0.00099 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.67 on 15 degrees of freedom
## Multiple R-squared: 0.5257, Adjusted R-squared: 0.4941
## F-statistic: 16.63 on 1 and 15 DF, p-value: 0.0009902
coef(mod1)
## (Intercept) hole_d
## 44.586265 5.160304
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_pea,
x= hole_d,
y = pods,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of pods",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of pods relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(pods ~ hole_d + mesh, data = bi_pea)
summary(mod)
##
## Call:
## lm(formula = pods ~ hole_d + mesh, data = bi_pea)
##
## Residuals:
## Min 1Q Median 3Q Max
## -270.367 -91.308 6.219 93.261 192.244
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 320.568 50.839 6.306 1.96e-06 ***
## hole_d 9.498 7.213 1.317 0.2009
## meshnot accessible 104.604 50.953 2.053 0.0516 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 123.3 on 23 degrees of freedom
## Multiple R-squared: 0.1958, Adjusted R-squared: 0.1259
## F-statistic: 2.801 on 2 and 23 DF, p-value: 0.08155
coef(mod)
## (Intercept) hole_d meshnot accessible
## 320.567996 9.497875 104.604190
# Distance effect only for accessible cages
acc_pea <- bi_pea %>%
dplyr::filter(mesh == "accessible")
mod1 <- lm(pods ~ hole_d, data = acc_pea)
summary(mod1)
##
## Call:
## lm(formula = pods ~ hole_d, data = acc_pea)
##
## Residuals:
## Min 1Q Median 3Q Max
## -176.935 -38.798 -8.912 56.525 194.007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 220.084 52.137 4.221 0.00074 ***
## hole_d 27.127 7.979 3.400 0.00396 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 105.1 on 15 degrees of freedom
## Multiple R-squared: 0.4352, Adjusted R-squared: 0.3975
## F-statistic: 11.56 on 1 and 15 DF, p-value: 0.003961
coef(mod1)
## (Intercept) hole_d
## 220.08410 27.12663
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_pea,
x= hole_d,
y = pod_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Pod mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Pod mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(pod_mass ~ hole_d + mesh, data = bi_pea)
summary(mod)
##
## Call:
## lm(formula = pod_mass ~ hole_d + mesh, data = bi_pea)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.367 -27.277 -4.439 25.866 51.171
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 75.950 15.052 5.046 4.16e-05 ***
## hole_d 2.300 2.135 1.077 0.2927
## meshnot accessible 30.510 15.085 2.022 0.0549 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36.51 on 23 degrees of freedom
## Multiple R-squared: 0.1778, Adjusted R-squared: 0.1063
## F-statistic: 2.486 on 2 and 23 DF, p-value: 0.1053
coef(mod)
## (Intercept) hole_d meshnot accessible
## 75.950050 2.299785 30.509960
Repeating the analyses just for cages located closest to the edge, given that susliks don’t venture deeper than that in the field.
pea0 <- pea_data %>%
dplyr::filter(edge_d == "0")
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = pea0, aes(x = mesh, y = height, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
# since the confidence intervals for the effect sizes are computed using
## bootstrapping, important to set a seed for reproducibility
set.seed(123)
ggbetweenstats(data = pea0,
x= mesh,
y = height,
pairwise.display = "all",
title = "Distribution of plant height across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea0,
x= mesh,
y = height) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2.85 2 8.75 0.111 One-w… Omega2 0.239 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.813 Bayes… -0.207 Bayesi… 0
## 2 mesh-big 0.936 cauchy 0 0.707 0.813 Bayes… -0.207 Bayesi… 0
## 3 mesh-middle 0.504 cauchy 0 0.707 0.813 Bayes… -0.207 Bayesi… 0
## 4 mesh-small 0.925 cauchy 0 0.707 0.813 Bayes… -0.207 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.813 Bayes… -0.207 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.813 Bayes… -0.207 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 1.37 0.699 two.sided q Holm Game… <language>
## 2 big small 3.18 0.364 two.sided q Holm Game… <language>
## 3 middle small 2.12 0.699 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = pea0, aes(x = mesh, y = n_plants, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = pea0,
x= mesh,
y = n_plants,
pairwise.display = "all",
title = "Distribution of number of plants across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea0,
x= mesh,
y = n_plants) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 7.06 2 8.47 0.0157 One-w… Omega2 0.514 0.95 0.00659 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 1.80 Bayes… 0.587 Bayesi… 0.111
## 2 mesh-big 0.924 cauchy 0 0.707 1.80 Bayes… 0.587 Bayesi… 0.111
## 3 mesh-middle 0.819 cauchy 0 0.707 1.80 Bayes… 0.587 Bayesi… 0.111
## 4 mesh-small 0.981 cauchy 0 0.707 1.80 Bayes… 0.587 Bayesi… 0.111
## 5 sig2 1 cauchy 0 0.707 1.80 Bayes… 0.587 Bayesi… 0.111
## 6 g_mesh 1 cauchy 0 0.707 1.80 Bayes… 0.587 Bayesi… 0.111
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 0.338 0.969 two.sided q Holm Game… <language>
## 2 big small 4.03 0.132 two.sided q Holm Game… <language>
## 3 middle small 4.46 0.132 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = pea0, aes(x = mesh, y = pods, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = pea0,
x= mesh,
y = pods,
pairwise.display = "s",
title = "Distribution of number of pods across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea0,
x= mesh,
y = pods) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 5.25 2 8.21 0.0339 One-w… Omega2 0.431 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.964 Bayes… -0.0366 Bayesi… 0
## 2 mesh-big 0.873 cauchy 0 0.707 0.964 Bayes… -0.0366 Bayesi… 0
## 3 mesh-middle 0.788 cauchy 0 0.707 0.964 Bayes… -0.0366 Bayesi… 0
## 4 mesh-small 0.962 cauchy 0 0.707 0.964 Bayes… -0.0366 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.964 Bayes… -0.0366 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.964 Bayes… -0.0366 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 0.176 0.992 two.sided q Holm Game… <language>
## 2 big small 3.30 0.227 two.sided q Holm Game… <language>
## 3 middle small 3.90 0.227 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
ggplot(data = pea0, aes(x = mesh, y = pod_mass, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = pea0, x= mesh, y = pod_mass, title = "Distribution of pod mass across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = pea0,
x= mesh,
y = pod_mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 5.45 2 8.76 0.0291 One-w… Omega2 0.431 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 1.38 Bayes… 0.321 Bayesi… 0.0581
## 2 mesh-big 0.922 cauchy 0 0.707 1.38 Bayes… 0.321 Bayesi… 0.0581
## 3 mesh-middle 0.772 cauchy 0 0.707 1.38 Bayes… 0.321 Bayesi… 0.0581
## 4 mesh-small 0.974 cauchy 0 0.707 1.38 Bayes… 0.321 Bayesi… 0.0581
## 5 sig2 1 cauchy 0 0.707 1.38 Bayes… 0.321 Bayesi… 0.0581
## 6 g_mesh 1 cauchy 0 0.707 1.38 Bayes… 0.321 Bayesi… 0.0581
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 0.487 0.937 two.sided q Holm Game… <language>
## 2 big small 3.74 0.197 two.sided q Holm Game… <language>
## 3 middle small 3.84 0.197 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea0,
x= hole_d,
y = height,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Plant height",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Plant height relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea0,
x= hole_d,
y = n_plants,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of plants",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of plants relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea0,
x= hole_d,
y = pods,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of pods",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of pods relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = pea0,
x= hole_d,
y = pod_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Pod mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Pod mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
alf_data <- rio::import(here("data", "cage_data.xlsx"), which = 2) %>%
janitor::clean_names() %>%
dplyr::mutate(mesh = fct_relevel(fence, "no")) %>%
dplyr::filter(!cage %in% c("V01b")) %>%
dplyr::mutate(pos = factor(dense_rank(pos)),
pos2 = as.numeric(pos2)) %>%
dplyr::rename("slope" = "pos2")
summary(alf_data)
## cage fence hole_d pos slope
## Length:40 Length:40 Min. :0.000 1:13 Min. : 1.0
## Class :character Class :character 1st Qu.:0.075 2:13 1st Qu.: 3.0
## Mode :character Mode :character Median :0.700 3:14 Median : 5.5
## Mean :1.100 Mean : 5.5
## 3rd Qu.:1.300 3rd Qu.: 8.0
## Max. :6.500 Max. :10.0
## kernel height other_plants mass
## Min. : 9457 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.:13275 1st Qu.: 30.00 1st Qu.: 0.00 1st Qu.: 61.50
## Median :22732 Median : 75.00 Median : 7.50 Median :116.00
## Mean :22221 Mean : 64.25 Mean : 28.62 Mean : 98.92
## 3rd Qu.:27919 3rd Qu.:100.00 3rd Qu.: 62.50 3rd Qu.:144.50
## Max. :45650 Max. :100.00 Max. :100.00 Max. :182.00
## dry_mass mesh
## Min. : 0.0 no :10
## 1st Qu.:25.5 big :10
## Median :46.0 middle:10
## Mean :44.6 small :10
## 3rd Qu.:66.5
## Max. :98.0
# Function to summarise
baseline_table <- function(data, variables, grouping_var) {
data %>%
group_by(!!!syms(grouping_var)) %>%
summarise(
across(
all_of(variables),
~ paste0(mean(.) %>% round(2), "(±", sd(.) %>% round(2), ")")
)
) %>% unite(
"grouping",
all_of(grouping_var)
) %>% pivot_longer(
cols = -"grouping",
names_to = "variables"
) %>% pivot_wider(
names_from = "grouping"
)
}
baseline_table(
alf_data,
variables = c("height", "other_plants", "mass", "dry_mass"),
grouping_var = "mesh"
) %>%
kbl(caption = "Means and SD for each mesh size") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| variables | no | big | middle | small |
|---|---|---|---|---|
| height | 46(±40.88) | 55(±34.08) | 70(±31.62) | 86(±21.19) |
| other_plants | 48.5(±48.42) | 28(±34.25) | 26(±34.71) | 12(±23) |
| mass | 69(±59.65) | 88.5(±47.29) | 103.2(±54.77) | 135(±39.71) |
| dry_mass | 29(±26.08) | 39.2(±23.38) | 47(±24.23) | 63.2(±20.25) |
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
The ANOVA was significant but there were no significant pairwise contrasts (!). The Games-Howell post-hoc test involves a correction for multiple comparisons and makes the test more conservative.
## for reproducibility
set.seed(123)
ggplot(data = alf_data, aes(x = mesh, y = height, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
ggbetweenstats(data = alf_data,
x= mesh,
y = height,
pairwise.display = "s",
title = "Distribution of plant height across mesh mesh size")
ggbetweenstats(data = alf_data,
x= mesh,
y = height) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 3.41 3 19.4 0.0381 One-w… Omega2 0.236 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 7 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 1.27 Bayes… 0.236 Bayesi… 0.0520
## 2 mesh-no 0.956 cauchy 0 0.707 1.27 Bayes… 0.236 Bayesi… 0.0520
## 3 mesh-big 0.814 cauchy 0 0.707 1.27 Bayes… 0.236 Bayesi… 0.0520
## 4 mesh-middle 0.708 cauchy 0 0.707 1.27 Bayes… 0.236 Bayesi… 0.0520
## 5 mesh-small 0.976 cauchy 0 0.707 1.27 Bayes… 0.236 Bayesi… 0.0520
## 6 sig2 1 cauchy 0 0.707 1.27 Bayes… 0.236 Bayesi… 0.0520
## 7 g_mesh 1 cauchy 0 0.707 1.27 Bayes… 0.236 Bayesi… 0.0520
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 6 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 1.44 1 two.sided q Holm Game… <language>
## 2 big small 3.46 0.555 two.sided q Holm Game… <language>
## 3 middle small 1.88 1 two.sided q Holm Game… <language>
## 4 no big 0.756 1 two.sided q Holm Game… <language>
## 5 no middle 2.08 1 two.sided q Holm Game… <language>
## 6 no small 3.89 0.409 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
pair_alf <- alf_data %>%
dplyr::filter(mesh %in% c("no", "small")) %>%
dplyr::filter(!cage == "V01")
ggplot(data = pair_alf, aes(x = mesh, y = height, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggwithinstats(data = pair_alf,
x= mesh,
y = height,
title = "Effect of small mesh size on plant height")
ggwithinstats(data = pair_alf,
x= mesh,
y = height) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 16
## term group statistic df.error p.value method alter…¹ effec…² estim…³ conf.…⁴
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 height mesh -4.41 9 0.00169 Paire… two.si… Hedges… -1.27 0.95
## # … with 6 more variables: conf.low <dbl>, conf.high <dbl>, conf.method <chr>,
## # conf.distribution <chr>, n.obs <int>, expression <list>, and abbreviated
## # variable names ¹alternative, ²effectsize, ³estimate, ⁴conf.level
##
## $caption_data
## # A tibble: 1 × 16
## term effec…¹ estim…² conf.…³ conf.…⁴ conf.…⁵ pd prior…⁶ prior…⁷ prior…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Differe… Bayesi… -36.1 0.95 -57.2 -14.0 0.998 cauchy 0 0.707
## # … with 6 more variables: bf10 <dbl>, method <chr>, conf.method <chr>,
## # log_e_bf10 <dbl>, n.obs <int>, expression <list>, and abbreviated variable
## # names ¹effectsize, ²estimate, ³conf.level, ⁴conf.low, ⁵conf.high,
## # ⁶prior.distribution, ⁷prior.location, ⁸prior.scale
##
## $pairwise_comparisons_data
## NULL
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
A paired t-test revealed that, across 10 cages, there was a statistically significant difference in plant height between cages protected with a small mesh and those unprotected . The effect size (Hedges’ g p = -1.27) was very large, as per Sawilowsky (2009) conventions. The Bayes Factor for the same analysis revealed that the data were 26 times more probable under the alternative hypothesis as compared to the null hypothesis. This can be considered as strong evidence (Jeffreys, 1961) in favor of the alternative hypothesis.
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = alf_data, aes(x = mesh, y = other_plants, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = alf_data,
x= mesh,
y = other_plants,
pairwise.display = "s",
title = " other plants across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = alf_data,
x= mesh,
y = other_plants) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1.66 3 19.4 0.209 One-w… Omega2 0.0781 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 7 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.411 Bayes… -0.889 Bayesi… 0
## 2 mesh-no 0.955 cauchy 0 0.707 0.411 Bayes… -0.889 Bayesi… 0
## 3 mesh-big 0.526 cauchy 0 0.707 0.411 Bayes… -0.889 Bayesi… 0
## 4 mesh-middle 0.596 cauchy 0 0.707 0.411 Bayes… -0.889 Bayesi… 0
## 5 mesh-small 0.915 cauchy 0 0.707 0.411 Bayes… -0.889 Bayesi… 0
## 6 sig2 1 cauchy 0 0.707 0.411 Bayes… -0.889 Bayesi… 0
## 7 g_mesh 1 cauchy 0 0.707 0.411 Bayes… -0.889 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 6 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle -0.183 1 two.sided q Holm Game… <language>
## 2 big small -1.73 1 two.sided q Holm Game… <language>
## 3 middle small -1.50 1 two.sided q Holm Game… <language>
## 4 no big -1.55 1 two.sided q Holm Game… <language>
## 5 no middle -1.69 1 two.sided q Holm Game… <language>
## 6 no small -3.05 1 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
ggplot(data = pair_alf, aes(x = mesh, y = other_plants, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggwithinstats(data = pair_alf,
x= mesh,
y = other_plants,
title = "Effect of small mesh size on presence of other plants")
## for reproducibility
set.seed(123)
ggwithinstats(data = pair_alf,
x= mesh,
y = other_plants) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 16
## term group stati…¹ df.er…² p.value method alter…³ effec…⁴ estim…⁵ conf.…⁶
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 other_pl… mesh 2.86 9 0.0189 Paire… two.si… Hedges… 0.825 0.95
## # … with 6 more variables: conf.low <dbl>, conf.high <dbl>, conf.method <chr>,
## # conf.distribution <chr>, n.obs <int>, expression <list>, and abbreviated
## # variable names ¹statistic, ²df.error, ³alternative, ⁴effectsize, ⁵estimate,
## # ⁶conf.level
##
## $caption_data
## # A tibble: 1 × 16
## term effec…¹ estim…² conf.…³ conf.…⁴ conf.…⁵ pd prior…⁶ prior…⁷ prior…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Differe… Bayesi… 30.9 0.95 4.08 57.4 0.985 cauchy 0 0.707
## # … with 6 more variables: bf10 <dbl>, method <chr>, conf.method <chr>,
## # log_e_bf10 <dbl>, n.obs <int>, expression <list>, and abbreviated variable
## # names ¹effectsize, ²estimate, ³conf.level, ⁴conf.low, ⁵conf.high,
## # ⁶prior.distribution, ⁷prior.location, ⁸prior.scale
##
## $pairwise_comparisons_data
## NULL
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
The relationship between mass and dry mass looks suspicious for two cages: v21 and v60b
mass_outliers <- alf_data %>%
dplyr::mutate(change_mass = mass - dry_mass)
ggplot(mass_outliers, mapping = aes(x = mesh, y = change_mass)) +
geom_boxplot(outlier.colour = "red", outlier.shape = 5, outlier.size = 4)
… but the boxplot doesn’t detect them as outliers, so we include them in the analysis.
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = alf_data, aes(x = mesh, y = mass, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = alf_data,
x= mesh,
y = mass,
pairwise.display = "s",
title = "Mass across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = alf_data,
x= mesh,
y = mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 3.30 3 19.8 0.0417 One-w… Omega2 0.225 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 7 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 1.40 Bayes… 0.337 Bayesi… 0.0619
## 2 mesh-no 0.963 cauchy 0 0.707 1.40 Bayes… 0.337 Bayesi… 0.0619
## 3 mesh-big 0.746 cauchy 0 0.707 1.40 Bayes… 0.337 Bayesi… 0.0619
## 4 mesh-middle 0.602 cauchy 0 0.707 1.40 Bayes… 0.337 Bayesi… 0.0619
## 5 mesh-small 0.983 cauchy 0 0.707 1.40 Bayes… 0.337 Bayesi… 0.0619
## 6 sig2 1 cauchy 0 0.707 1.40 Bayes… 0.337 Bayesi… 0.0619
## 7 g_mesh 1 cauchy 0 0.707 1.40 Bayes… 0.337 Bayesi… 0.0619
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 6 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 0.908 1 two.sided q Holm Game… <language>
## 2 big small 3.37 0.589 two.sided q Holm Game… <language>
## 3 middle small 2.10 1 two.sided q Holm Game… <language>
## 4 no big 1.15 1 two.sided q Holm Game… <language>
## 5 no middle 1.89 1 two.sided q Holm Game… <language>
## 6 no small 4.12 0.275 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
ggplot(data = pair_alf, aes(x = mesh, y = mass, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggwithinstats(data = pair_alf,
x= mesh,
y = mass,
title = "Effect of small mesh size on mass")
## for reproducibility
set.seed(123)
ggwithinstats(data = pair_alf,
x= mesh,
y = mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 16
## term group statistic df.error p.value method alter…¹ effec…² estim…³ conf.…⁴
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 mass mesh -5.42 9 0.000421 Paire… two.si… Hedges… -1.57 0.95
## # … with 6 more variables: conf.low <dbl>, conf.high <dbl>, conf.method <chr>,
## # conf.distribution <chr>, n.obs <int>, expression <list>, and abbreviated
## # variable names ¹alternative, ²effectsize, ³estimate, ⁴conf.level
##
## $caption_data
## # A tibble: 1 × 16
## term effec…¹ estim…² conf.…³ conf.…⁴ conf.…⁵ pd prior…⁶ prior…⁷ prior…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Differe… Bayesi… -61.9 0.95 -89.3 -31.1 1.00 cauchy 0 0.707
## # … with 6 more variables: bf10 <dbl>, method <chr>, conf.method <chr>,
## # log_e_bf10 <dbl>, n.obs <int>, expression <list>, and abbreviated variable
## # names ¹effectsize, ²estimate, ³conf.level, ⁴conf.low, ⁵conf.high,
## # ⁶prior.distribution, ⁷prior.location, ⁸prior.scale
##
## $pairwise_comparisons_data
## NULL
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = alf_data, aes(x = mesh, y = dry_mass, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = alf_data, x= mesh, y = dry_mass, title = "Dry mass across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = alf_data,
x= mesh,
y = dry_mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 3.84 3 19.9 0.0256 One-w… Omega2 0.262 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 7 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 2.82 Bayes… 1.04 Bayesi… 0.135
## 2 mesh-no 0.977 cauchy 0 0.707 2.82 Bayes… 1.04 Bayesi… 0.135
## 3 mesh-big 0.774 cauchy 0 0.707 2.82 Bayes… 1.04 Bayesi… 0.135
## 4 mesh-middle 0.626 cauchy 0 0.707 2.82 Bayes… 1.04 Bayesi… 0.135
## 5 mesh-small 0.991 cauchy 0 0.707 2.82 Bayes… 1.04 Bayesi… 0.135
## 6 sig2 1 cauchy 0 0.707 2.82 Bayes… 1.04 Bayesi… 0.135
## 7 g_mesh 1 cauchy 0 0.707 2.82 Bayes… 1.04 Bayesi… 0.135
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 6 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle 1.04 1 two.sided q Holm Game… <language>
## 2 big small 3.47 0.515 two.sided q Holm Game… <language>
## 3 middle small 2.29 1 two.sided q Holm Game… <language>
## 4 no big 1.30 1 two.sided q Holm Game… <language>
## 5 no middle 2.26 1 two.sided q Holm Game… <language>
## 6 no small 4.63 0.126 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
ggplot(data = pair_alf, aes(x = mesh, y = dry_mass, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggwithinstats(data = pair_alf,
x= mesh,
y = dry_mass,
title = "Effect of small mesh size on dry mass")
## for reproducibility
set.seed(123)
ggwithinstats(data = pair_alf,
x= mesh,
y = dry_mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 16
## term group statis…¹ df.er…² p.value method alter…³ effec…⁴ estim…⁵ conf.…⁶
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 dry_mass mesh -4.90 9 8.50e-4 Paire… two.si… Hedges… -1.42 0.95
## # … with 6 more variables: conf.low <dbl>, conf.high <dbl>, conf.method <chr>,
## # conf.distribution <chr>, n.obs <int>, expression <list>, and abbreviated
## # variable names ¹statistic, ²df.error, ³alternative, ⁴effectsize, ⁵estimate,
## # ⁶conf.level
##
## $caption_data
## # A tibble: 1 × 16
## term effec…¹ estim…² conf.…³ conf.…⁴ conf.…⁵ pd prior…⁶ prior…⁷ prior…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Differe… Bayesi… -31.7 0.95 -47.3 -14.3 0.998 cauchy 0 0.707
## # … with 6 more variables: bf10 <dbl>, method <chr>, conf.method <chr>,
## # log_e_bf10 <dbl>, n.obs <int>, expression <list>, and abbreviated variable
## # names ¹effectsize, ²estimate, ³conf.level, ⁴conf.low, ⁵conf.high,
## # ⁶prior.distribution, ⁷prior.location, ⁸prior.scale
##
## $pairwise_comparisons_data
## NULL
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
How to interpret diagnostic plots: https://data.library.virginia.edu/diagnostic-plots/
# Test for normality and homoscedasticity of residuals
par(mfrow=c(2,2))
plot(lm(height ~ pos + slope + kernel + mesh, data = alf_data))
plot(lm(mass ~ pos + slope + kernel + mesh, data = alf_data))
plot(lm(dry_mass ~ pos + slope + kernel + mesh, data = alf_data))
# Perform multiple linear regression
lm_height <- lm(height ~ pos + slope + kernel + mesh, data = alf_data)
summary(lm_height)
##
## Call:
## lm(formula = height ~ pos + slope + kernel + mesh, data = alf_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.455 -10.413 -2.802 6.971 43.790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.288173 18.182708 1.556 0.12960
## pos2 26.820013 8.151914 3.290 0.00244 **
## pos3 38.480236 13.794926 2.789 0.00882 **
## slope 5.263648 1.804398 2.917 0.00641 **
## kernel -0.001564 0.000815 -1.920 0.06386 .
## meshbig 14.558413 8.172024 1.781 0.08433 .
## meshmiddle 23.799964 8.305001 2.866 0.00729 **
## meshsmall 40.000000 8.136258 4.916 2.53e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.19 on 32 degrees of freedom
## Multiple R-squared: 0.7783, Adjusted R-squared: 0.7298
## F-statistic: 16.05 on 7 and 32 DF, p-value: 7.563e-09
lm_mass <- lm(mass ~ pos + slope + kernel + mesh, data = alf_data)
summary(lm_mass)
##
## Call:
## lm(formula = mass ~ pos + slope + kernel + mesh, data = alf_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.013 -19.784 -0.941 16.087 68.172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.169420 30.598953 1.770 0.08620 .
## pos2 36.519013 13.718530 2.662 0.01205 *
## pos3 15.372829 23.214929 0.662 0.51259
## slope 14.997493 3.036549 4.939 2.37e-05 ***
## kernel -0.003824 0.001371 -2.788 0.00885 **
## meshbig 25.218042 13.752372 1.834 0.07601 .
## meshmiddle 28.746281 13.976154 2.057 0.04793 *
## meshsmall 66.000000 13.692184 4.820 3.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.62 on 32 degrees of freedom
## Multiple R-squared: 0.7427, Adjusted R-squared: 0.6864
## F-statistic: 13.2 on 7 and 32 DF, p-value: 7.355e-08
lm_dry <- lm(dry_mass ~ pos + slope + kernel + mesh, data = alf_data)
summary(lm_dry)
##
## Call:
## lm(formula = dry_mass ~ pos + slope + kernel + mesh, data = alf_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.356 -9.749 -1.292 7.752 41.007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.4498265 15.0488135 1.292 0.2055
## pos2 13.9623425 6.7468843 2.069 0.0467 *
## pos3 3.9539527 11.4172910 0.346 0.7314
## slope 7.3039186 1.4933993 4.891 2.73e-05 ***
## kernel -0.0016418 0.0006745 -2.434 0.0207 *
## meshbig 12.3903922 6.7635283 1.832 0.0763 .
## meshmiddle 15.5655277 6.8735859 2.265 0.0305 *
## meshsmall 34.2000000 6.7339272 5.079 1.58e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.06 on 32 degrees of freedom
## Multiple R-squared: 0.7237, Adjusted R-squared: 0.6632
## F-statistic: 11.97 on 7 and 32 DF, p-value: 2.17e-07
To avoid suslik effect
small<- alf_data %>%
dplyr::filter(mesh=="small")
# Test for normality and homoscedasticity of residuals
par(mfrow=c(2,2))
# non-linear relationship not explained by the data. Bad model
plot(lm(height ~ pos + slope, data = small))
# 2 is an outlier and it will affect the lm
plot(lm(mass ~ pos + slope, data = small))
# 2 is an outlier(within cooks distance) and it will affect the lm
plot(lm(dry_mass ~ pos + slope, data = small))
# Perform multiple linear regression
lm_height <- lm(height ~ pos + slope, data = small)
summary(lm_height)
##
## Call:
## lm(formula = height ~ pos + slope, data = small)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.333 -9.722 3.056 9.861 20.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.778 16.586 3.483 0.0131 *
## pos2 9.259 15.497 0.597 0.5720
## pos3 27.963 14.402 1.942 0.1002
## slope 2.593 2.090 1.241 0.2610
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.81 on 6 degrees of freedom
## Multiple R-squared: 0.4747, Adjusted R-squared: 0.212
## F-statistic: 1.807 on 3 and 6 DF, p-value: 0.246
lm_mass <- lm(mass ~ pos + slope, data = small)
summary(lm_mass)
##
## Call:
## lm(formula = mass ~ pos + slope, data = small)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.00 -17.25 -4.00 21.83 37.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.667 26.632 2.729 0.0343 *
## pos2 3.444 24.883 0.138 0.8944
## pos3 14.222 23.125 0.615 0.5611
## slope 10.111 3.355 3.013 0.0236 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.2 on 6 degrees of freedom
## Multiple R-squared: 0.6145, Adjusted R-squared: 0.4218
## F-statistic: 3.188 on 3 and 6 DF, p-value: 0.1055
lm_dry <- lm(dry_mass ~ pos + slope, data = small)
summary(lm_dry)
##
## Call:
## lm(formula = dry_mass ~ pos + slope, data = small)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.000 -10.500 -1.333 13.583 17.667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.3333 15.9261 2.470 0.0485 *
## pos2 -0.1111 14.8806 -0.007 0.9943
## pos3 -2.8889 13.8289 -0.209 0.8414
## slope 4.5556 2.0065 2.270 0.0636 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.06 on 6 degrees of freedom
## Multiple R-squared: 0.4697, Adjusted R-squared: 0.2045
## F-statistic: 1.771 on 3 and 6 DF, p-value: 0.2523
# Test for normality and homoscedasticity of residuals
par(mfrow=c(2,2))
# Not equal variance (homoscedasticity)
plot(lm(height ~ slope, data = small))
#okayish
plot(lm(mass ~ slope, data = small))
#okayish
plot(lm(dry_mass ~ slope, data = small))
# Perform multiple linear regression
lm_height <- lm(height ~slope, data = small)
summary(lm_height)
##
## Call:
## lm(formula = height ~ slope, data = small)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.212 -7.970 4.303 14.000 24.909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.667 14.401 5.046 0.000994 ***
## slope 2.424 2.321 1.045 0.326775
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.08 on 8 degrees of freedom
## Multiple R-squared: 0.12, Adjusted R-squared: 0.01001
## F-statistic: 1.091 on 1 and 8 DF, p-value: 0.3268
lm_mass <- lm(mass ~ slope, data = small)
summary(lm_mass)
##
## Call:
## lm(formula = mass ~ slope, data = small)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.83 -21.55 -6.83 25.90 34.07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 79.733 18.495 4.311 0.00258 **
## slope 10.048 2.981 3.371 0.00977 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.07 on 8 degrees of freedom
## Multiple R-squared: 0.5869, Adjusted R-squared: 0.5352
## F-statistic: 11.36 on 1 and 8 DF, p-value: 0.009768
lm_dry <- lm(dry_mass ~slope, data = small)
summary(lm_dry)
##
## Call:
## lm(formula = dry_mass ~ slope, data = small)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.71 -10.11 -0.20 11.88 18.85
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.133 10.736 3.552 0.00749 **
## slope 4.558 1.730 2.634 0.02999 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.72 on 8 degrees of freedom
## Multiple R-squared: 0.4645, Adjusted R-squared: 0.3975
## F-statistic: 6.938 on 1 and 8 DF, p-value: 0.02999
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = alf_data,
x= hole_d,
y = height,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Plant height",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Plant height relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = alf_data,
x= hole_d,
y = other_plants,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Other plants",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Other plants relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = alf_data,
x= hole_d,
y = mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Mass",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = alf_data,
x= hole_d,
y = dry_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Dry mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Dry mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
No mesh, big and middle merged, to have accessible not accessible
bi_al <-alf_data %>%
dplyr::mutate(mesh = recode(mesh, big = "accessible", middle = "accessible", no = "accessible", small = "not accessible"))
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= hole_d,
y = height,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Plant height",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Plant height relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(height ~ hole_d + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = height ~ hole_d + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.167 -20.903 0.486 23.800 53.833
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.167 6.163 7.491 6.35e-09 ***
## hole_d 11.130 3.212 3.465 0.00136 **
## meshnot accessible 23.361 10.746 2.174 0.03619 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.09 on 37 degrees of freedom
## Multiple R-squared: 0.3447, Adjusted R-squared: 0.3092
## F-statistic: 9.729 on 2 and 37 DF, p-value: 0.0004024
coef(mod)
## (Intercept) hole_d meshnot accessible
## 46.16708 11.12972 23.36094
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= hole_d,
y = other_plants,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Other plants",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Other plants relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(other_plants ~ hole_d + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = other_plants ~ hole_d + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.652 -30.061 -8.958 28.991 56.348
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.652 7.187 6.074 4.98e-07 ***
## hole_d -9.745 3.746 -2.602 0.0133 *
## meshnot accessible -17.229 12.531 -1.375 0.1774
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.92 on 37 degrees of freedom
## Multiple R-squared: 0.2123, Adjusted R-squared: 0.1697
## F-statistic: 4.986 on 2 and 37 DF, p-value: 0.0121
coef(mod)
## (Intercept) hole_d meshnot accessible
## 43.652200 -9.745411 -17.228992
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= hole_d,
y = mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Mass",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(mass ~ hole_d + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = mass ~ hole_d + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -70.741 -38.194 -9.731 44.298 73.259
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.741 9.641 7.337 1.01e-08 ***
## hole_d 16.602 5.025 3.304 0.00212 **
## meshnot accessible 39.688 16.811 2.361 0.02362 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.51 on 37 degrees of freedom
## Multiple R-squared: 0.3427, Adjusted R-squared: 0.3072
## F-statistic: 9.647 on 2 and 37 DF, p-value: 0.0004249
coef(mod)
## (Intercept) hole_d meshnot accessible
## 70.74057 16.60215 39.68824
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= hole_d,
y = dry_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Dry mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Dry mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(dry_mass ~ hole_d + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = dry_mass ~ hole_d + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.497 -19.862 -4.923 18.555 40.137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.497 4.754 6.836 4.69e-08 ***
## hole_d 6.064 2.478 2.448 0.0192 *
## meshnot accessible 21.727 8.289 2.621 0.0126 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.44 on 37 degrees of freedom
## Multiple R-squared: 0.2905, Adjusted R-squared: 0.2522
## F-statistic: 7.576 on 2 and 37 DF, p-value: 0.001746
coef(mod)
## (Intercept) hole_d meshnot accessible
## 32.497433 6.064281 21.727431
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= kernel,
y = height,
grouping.var = mesh,
xlab = "Density (kernel)",
ylab= "Plant height",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Plant height relative to density (kernel), grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(height ~ kernel + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = height ~ kernel + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.570 -22.328 3.435 24.628 53.636
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.4261560 12.8602707 7.420 7.87e-09 ***
## kernel -0.0017279 0.0005253 -3.289 0.00221 **
## meshnot accessible 28.8732310 10.7532270 2.685 0.01079 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.45 on 37 degrees of freedom
## Multiple R-squared: 0.3284, Adjusted R-squared: 0.2921
## F-statistic: 9.046 on 2 and 37 DF, p-value: 0.0006332
coef(mod)
## (Intercept) kernel meshnot accessible
## 95.426155984 -0.001727883 28.873230996
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= kernel,
y = other_plants,
grouping.var = mesh,
xlab = "Density (kernel)",
ylab= "Other plants",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Other plants relative to density (kernel), grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(other_plants ~ kernel + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = other_plants ~ kernel + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.25 -25.93 -14.25 33.12 65.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.166e+01 1.559e+01 0.748 0.4592
## kernel 1.012e-03 6.368e-04 1.589 0.1205
## meshnot accessible -2.209e+01 1.303e+01 -1.695 0.0985 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.7 on 37 degrees of freedom
## Multiple R-squared: 0.1277, Adjusted R-squared: 0.08059
## F-statistic: 2.709 on 2 and 37 DF, p-value: 0.0798
coef(mod)
## (Intercept) kernel meshnot accessible
## 11.659782225 0.001012052 -22.092415802
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= kernel,
y = mass,
grouping.var = mesh,
xlab = "Density (kernel)",
ylab= "Mass",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Mass relative to Density (kernel), grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(mass ~ kernel + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = mass ~ kernel + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -86.602 -36.676 9.515 44.097 67.980
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.135e+02 2.210e+01 5.135 9.29e-06 ***
## kernel -1.195e-03 9.027e-04 -1.324 0.1937
## meshnot accessible 4.801e+01 1.848e+01 2.598 0.0134 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.6 on 37 degrees of freedom
## Multiple R-squared: 0.1873, Adjusted R-squared: 0.1434
## F-statistic: 4.264 on 2 and 37 DF, p-value: 0.02155
coef(mod)
## (Intercept) kernel meshnot accessible
## 113.47210991 -0.00119485 48.01233784
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_al,
x= kernel,
y = dry_mass,
grouping.var = mesh,
xlab = "Density (kernel)",
ylab= "Dry mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Dry mass relative to Density (kernel), grouped by mesh size"
),
plotgrid.args = list(nrow = 4, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(dry_mass ~ kernel + mesh, data = bi_al)
summary(mod)
##
## Call:
## lm(formula = dry_mass ~ kernel + mesh, data = bi_al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.333 -18.535 -0.436 23.098 39.033
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.3437562 10.5074059 4.220 0.000152 ***
## kernel -0.0002673 0.0004292 -0.623 0.537297
## meshnot accessible 24.7803914 8.7858586 2.820 0.007663 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.06 on 37 degrees of freedom
## Multiple R-squared: 0.1842, Adjusted R-squared: 0.1401
## F-statistic: 4.178 on 2 and 37 DF, p-value: 0.02312
coef(mod)
## (Intercept) kernel meshnot accessible
## 44.3437561760 -0.0002672688 24.7803913758
There are two variables related to the position of the cages in the field: Pos: Relates to the area covered by susliks. Pos2: Slope of the field. 10 is lower, hence more humidity and beneficial for the alfalfa
To see the effect of Pos and Pos2 on the alfalfa success, we group data from all cages except those with small mesh, selecting only the plants without protection again susliks.
alf_unprotected <- alf_data %>%
dplyr::filter(!mesh == "small") %>%
dplyr::select(!mesh) # %>%
#dplyr::mutate(slope = as.numeric(slope))
## for reproducibility
set.seed(123)
ggbetweenstats( data = alf_unprotected, x= pos, y = height, title = "Plant height vs suslik density")
## for reproducibility
set.seed(123)
ggbetweenstats( data = alf_unprotected, x= pos, y = other_plants, title = "Other plants vs suslik density")
## for reproducibility
set.seed(123)
ggbetweenstats( data = alf_unprotected, x= pos, y = mass, title = "Mass vs suslik density")
## for reproducibility
set.seed(123)
ggbetweenstats( data = alf_unprotected,
x= pos, y = dry_mass,
pairwise.display = "s",
output = "dataframe",
title = "Dry mass vs suslik density")
## for reproducibility
set.seed(123)
ggscatterstats( data = alf_unprotected, x= slope, y = height, title = "Plant height vs slope")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
ggscatterstats( data = alf_unprotected, x= slope, y = other_plants, title = "Other plants vs slope")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
ggscatterstats( data = alf_unprotected, x= slope, y = mass, title = "Mass vs slope")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
ggscatterstats( data = alf_unprotected, x= slope, y = dry_mass, title = "Dry mass vs slope")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Pos is basically an alternative to kernel, so we don’t use it in the same model. Pos2 (slope of the field) is different, but they are still a bit correlated (less susliks up the hill?). So probably it makes sense to test the interaction.
The additive model is better, the interaction is not significant, so kernel and pos2 are independent (I tested also the other variables and the interaction is never significant)
# Fit the linear regression model
model <- lm(height ~ kernel * slope, data = alf_unprotected)
summary(model)
##
## Call:
## lm(formula = height ~ kernel * slope, data = alf_unprotected)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.78 -15.98 -1.73 13.34 42.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.187e+02 2.153e+01 5.511 8.78e-06 ***
## kernel -5.498e-03 1.159e-03 -4.742 6.64e-05 ***
## slope 5.326e+00 3.524e+00 1.511 0.143
## kernel:slope 2.260e-04 1.507e-04 1.500 0.146
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.05 on 26 degrees of freedom
## Multiple R-squared: 0.6922, Adjusted R-squared: 0.6566
## F-statistic: 19.49 on 3 and 26 DF, p-value: 7.881e-07
# Fit the linear regression model
model <- lm(height ~ kernel + slope, data = alf_unprotected)
summary(model)
##
## Call:
## lm(formula = height ~ kernel + slope, data = alf_unprotected)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.25 -15.83 -2.86 10.98 50.02
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90.3524940 10.5865931 8.535 3.79e-09 ***
## kernel -0.0039601 0.0005533 -7.158 1.07e-07 ***
## slope 9.9482874 1.7483852 5.690 4.81e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.53 on 27 degrees of freedom
## Multiple R-squared: 0.6655, Adjusted R-squared: 0.6408
## F-statistic: 26.86 on 2 and 27 DF, p-value: 3.79e-07
# Fit the linear regression model
model <- lm(other_plants ~ kernel + slope, data = alf_unprotected)
summary(model)
##
## Call:
## lm(formula = other_plants ~ kernel + slope, data = alf_unprotected)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.303 -22.928 3.801 19.372 51.731
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.870994 13.987281 1.135 0.266
## kernel 0.003590 0.000731 4.912 3.86e-05 ***
## slope -11.191063 2.310012 -4.845 4.62e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.45 on 27 degrees of freedom
## Multiple R-squared: 0.5208, Adjusted R-squared: 0.4853
## F-statistic: 14.67 on 2 and 27 DF, p-value: 4.864e-05
# Fit the linear regression model
model <- lm(mass ~ kernel + slope, data = alf_unprotected)
summary(model)
##
## Call:
## lm(formula = mass ~ kernel + slope, data = alf_unprotected)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.543 -22.672 -5.548 19.529 84.808
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.067e+02 1.751e+01 6.092 1.66e-06 ***
## kernel -5.068e-03 9.152e-04 -5.537 7.22e-06 ***
## slope 1.689e+01 2.892e+00 5.840 3.22e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.62 on 27 degrees of freedom
## Multiple R-squared: 0.5971, Adjusted R-squared: 0.5673
## F-statistic: 20.01 on 2 and 27 DF, p-value: 4.672e-06
# Fit the linear regression model
model <- lm(dry_mass ~ kernel + slope, data = alf_unprotected)
summary(model)
##
## Call:
## lm(formula = dry_mass ~ kernel + slope, data = alf_unprotected)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.410 -8.685 -3.997 8.298 42.334
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.5055672 7.5605246 5.490 8.19e-06 ***
## kernel -0.0022248 0.0003951 -5.631 5.63e-06 ***
## slope 8.4311295 1.2486273 6.752 2.99e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 27 degrees of freedom
## Multiple R-squared: 0.6444, Adjusted R-squared: 0.6181
## F-statistic: 24.46 on 2 and 27 DF, p-value: 8.671e-07
Copied and pasted the values from the formulas from the original file (cage_experiments_results2)
wheat_data <- rio::import(here("data", "wheat2022.xlsx")) %>%
janitor::clean_names() %>%
dplyr::mutate(mesh = as.factor(mesh)) %>%
dplyr::mutate(line = as.factor(line))
head(wheat_data)
## cage mesh hole_d line n_cobs total_mass grain_mass cob_mass
## 1 A03 big 7.1 A 425 520 1.2666667 1.2235294
## 2 A06 big 6.6 A 505 720 1.2000000 1.4257426
## 3 A09 big 2.1 A 0 0 0.0000000 0.0000000
## 4 B03 big 4.2 B 371 403 0.5333333 1.0862534
## 5 B06 big 3.8 B 113 109 0.8333333 0.9646018
## 6 B09 big 4.6 B 516 718 1.1000000 1.3914729
## comment
## 1 <NA>
## 2 <NA>
## 3 eaten all; 0 is not mistake
## 4 <NA>
## 5 <NA>
## 6 <NA>
# Function to summarise
baseline_table <- function(data, variables, grouping_var) {
data %>%
group_by(!!!syms(grouping_var)) %>%
summarise(
across(
all_of(variables),
~ paste0(mean(.) %>% round(2), "(±", sd(.) %>% round(2), ")")
)
) %>% unite(
"grouping",
all_of(grouping_var)
) %>% pivot_longer(
cols = -"grouping",
names_to = "variables"
) %>% pivot_wider(
names_from = "grouping"
)
}
table_w <- baseline_table(
wheat_data,
variables = c("n_cobs", "total_mass", "grain_mass", "cob_mass"),
grouping_var = "mesh"
)
table_w <- as.data.frame(table_w)
rownames(table_w)= c("Number of cobs", "Total mass", "Grain mass", "Cob mass")
table_w
## variables big middle small
## Number of cobs n_cobs 380.44(±207.54) 283.4(±213.53) 486(±78.8)
## Total mass total_mass 572.44(±429.95) 418.3(±401.81) 741.44(±287.59)
## Grain mass grain_mass 0.98(±0.5) 0.98(±0.5) 1.27(±0.33)
## Cob mass cob_mass 1.23(±0.62) 1.15(±0.58) 1.51(±0.45)
summary_wheat <- table_w %>%
dplyr::select(-variables) %>%
kbl(caption = "Means and SD for each mesh size", col.names = c("Large", "Middle", "Small")) %>%
kable_styling(bootstrap_options = c("striped", "hover"))
save_kable(summary_wheat, here::here("output", "summary_wheat.png"))
summary_wheat
| Large | Middle | Small | |
|---|---|---|---|
| Number of cobs | 380.44(±207.54) | 283.4(±213.53) | 486(±78.8) |
| Total mass | 572.44(±429.95) | 418.3(±401.81) | 741.44(±287.59) |
| Grain mass | 0.98(±0.5) | 0.98(±0.5) | 1.27(±0.33) |
| Cob mass | 1.23(±0.62) | 1.15(±0.58) | 1.51(±0.45) |
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
The ANOVA was significant but there were no significant pairwise contrasts (!). The Games-Howell post-hoc test involves a correction for multiple comparisons and makes the test more conservative.
ggplot(data = wheat_data, aes(x = mesh, y = total_mass, color=mesh)) +
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
# since the confidence intervals for the effect sizes are computed using
## bootstrapping, important to set a seed for reproducibility
set.seed(123)
ggbetweenstats(data = wheat_data,
x= mesh,
y = total_mass,
pairwise.display = "all",
title = "Distribution of total mass across mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = wheat_data,
x= mesh,
y = total_mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2.03 2 16.2 0.164 One-w… Omega2 0.0968 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.494 Bayes… -0.706 Bayesi… 0
## 2 mesh-big 0.514 cauchy 0 0.707 0.494 Bayes… -0.706 Bayesi… 0
## 3 mesh-middle 0.913 cauchy 0 0.707 0.494 Bayes… -0.706 Bayesi… 0
## 4 mesh-small 0.920 cauchy 0 0.707 0.494 Bayes… -0.706 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.494 Bayes… -0.706 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.494 Bayes… -0.706 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle -1.14 1 two.sided q Holm Game… <language>
## 2 big small 1.39 1 two.sided q Holm Game… <language>
## 3 middle small 2.87 0.410 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = wheat_data, aes(x = mesh, y = n_cobs, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = wheat_data,
x= mesh,
y = n_cobs,
pairwise.display = "all",
title = "Distribution of number of cobs across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = wheat_data,
x= mesh,
y = n_cobs) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 4.28 2 13.9 0.0356 One-w… Omega2 0.280 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 1.18 Bayes… 0.163 Bayesi… 0.0262
## 2 mesh-big 0.516 cauchy 0 0.707 1.18 Bayes… 0.163 Bayesi… 0.0262
## 3 mesh-middle 0.966 cauchy 0 0.707 1.18 Bayes… 0.163 Bayesi… 0.0262
## 4 mesh-small 0.971 cauchy 0 0.707 1.18 Bayes… 0.163 Bayesi… 0.0262
## 5 sig2 1 cauchy 0 0.707 1.18 Bayes… 0.163 Bayesi… 0.0262
## 6 g_mesh 1 cauchy 0 0.707 1.18 Bayes… 0.163 Bayesi… 0.0262
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle -1.42 0.728 two.sided q Holm Game… <language>
## 2 big small 2.02 0.728 two.sided q Holm Game… <language>
## 3 middle small 3.95 0.122 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
Welch’s ANOVA with the Games-Howell post hoc test. https://statisticsbyjim.com/anova/welchs-anova-compared-to-classic-one-way-anova/
ggplot(data = wheat_data, aes(x = mesh, y = grain_mass, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = wheat_data,
x= mesh,
y = grain_mass,
pairwise.display = "s",
title = "Distribution of grain_mass across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = wheat_data,
x= mesh,
y = grain_mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1.58 2 16.1 0.236 One-w… Omega2 0.0574 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.350 Bayes… -1.05 Bayesi… 0
## 2 mesh-big 0.75 cauchy 0 0.707 0.350 Bayes… -1.05 Bayesi… 0
## 3 mesh-middle 0.766 cauchy 0 0.707 0.350 Bayes… -1.05 Bayesi… 0
## 4 mesh-small 0.916 cauchy 0 0.707 0.350 Bayes… -1.05 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.350 Bayes… -1.05 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.350 Bayes… -1.05 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle -0.0205 1.00 two.sided q Holm Game… <language>
## 2 big small 2.03 0.951 two.sided q Holm Game… <language>
## 3 middle small 2.12 0.951 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
ggplot(data = wheat_data, aes(x = mesh, y = cob_mass, color=mesh))+
geom_boxplot()+
scale_color_brewer(palette="Dark2") +
geom_jitter(shape=16, position=position_jitter(0.2))
## for reproducibility
set.seed(123)
ggbetweenstats( data = wheat_data, x= mesh, y = cob_mass, title = "Distribution of cob mass across mesh mesh size")
## for reproducibility
set.seed(123)
ggbetweenstats(data = wheat_data,
x= mesh,
y = cob_mass) %>%
extract_stats()
## $subtitle_data
## # A tibble: 1 × 14
## statistic df df.er…¹ p.value method effec…² estim…³ conf.…⁴ conf.…⁵ conf.…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1.26 2 16.4 0.309 One-w… Omega2 0.0265 0.95 0 1
## # … with 4 more variables: conf.method <chr>, conf.distribution <chr>,
## # n.obs <int>, expression <list>, and abbreviated variable names ¹df.error,
## # ²effectsize, ³estimate, ⁴conf.level, ⁵conf.low, ⁶conf.high
##
## $caption_data
## # A tibble: 6 × 17
## term pd prior…¹ prior…² prior…³ bf10 method log_e…⁴ effec…⁵ estim…⁶
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
## 1 mu 1 cauchy 0 0.707 0.312 Bayes… -1.16 Bayesi… 0
## 2 mesh-big 0.652 cauchy 0 0.707 0.312 Bayes… -1.16 Bayesi… 0
## 3 mesh-middle 0.810 cauchy 0 0.707 0.312 Bayes… -1.16 Bayesi… 0
## 4 mesh-small 0.895 cauchy 0 0.707 0.312 Bayes… -1.16 Bayesi… 0
## 5 sig2 1 cauchy 0 0.707 0.312 Bayes… -1.16 Bayesi… 0
## 6 g_mesh 1 cauchy 0 0.707 0.312 Bayes… -1.16 Bayesi… 0
## # … with 7 more variables: std.dev <dbl>, conf.level <dbl>, conf.low <dbl>,
## # conf.high <dbl>, conf.method <chr>, n.obs <int>, expression <list>, and
## # abbreviated variable names ¹prior.distribution, ²prior.location,
## # ³prior.scale, ⁴log_e_bf10, ⁵effectsize, ⁶estimate
##
## $pairwise_comparisons_data
## # A tibble: 3 × 9
## group1 group2 statistic p.value alternative distrib…¹ p.adj…² test expression
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <list>
## 1 big middle -0.409 1 two.sided q Holm Game… <language>
## 2 big small 1.55 1 two.sided q Holm Game… <language>
## 3 middle small 2.15 0.925 two.sided q Holm Game… <language>
## # … with abbreviated variable names ¹distribution, ²p.adjust.method
##
## $descriptive_data
## NULL
##
## $one_sample_data
## NULL
##
## $tidy_data
## NULL
##
## $glance_data
## NULL
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = wheat_data,
x= hole_d,
y = n_cobs,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of cobs",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of cobs relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(n_cobs ~ hole_d + mesh, data = wheat_data)
summary(mod)
##
## Call:
## lm(formula = n_cobs ~ hole_d + mesh, data = wheat_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -301.55 -100.00 21.39 119.90 258.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 241.18 86.59 2.785 0.0103 *
## hole_d 28.75 13.62 2.110 0.0455 *
## meshmiddle -89.44 77.35 -1.156 0.2589
## meshsmall 95.97 79.40 1.209 0.2386
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 168.2 on 24 degrees of freedom
## Multiple R-squared: 0.3207, Adjusted R-squared: 0.2357
## F-statistic: 3.776 on 3 and 24 DF, p-value: 0.02373
coef(mod)
## (Intercept) hole_d meshmiddle meshsmall
## 241.18396 28.74643 -89.44261 95.97341
Pearson’s correlation test revealed that, across the 9 cages with big mesh size, the distance to the nearest hole (m) was positively correlated with plant height, and this effect was statistically significant. The effect size (r=0.81) was large, as per Cohen’s (1988) conventions. The Bayes Factor for the same analysis revealed that the data were 7.14 times more probable under the alternative hypothesis as compared to the null hypothesis. This can be considered as moderate evidence (Jeffreys, 1961) in favor of the alternative hypothesis (of a correlation between these two variables).
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = wheat_data,
x= hole_d,
y = total_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Total mass",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Total mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = wheat_data,
x= hole_d,
y = grain_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Grain mass",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Grain mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = wheat_data,
x= hole_d,
y = cob_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Cob mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Cob mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
No mesh, big and middle merged, to have accessible not accessible
bi_whe <-wheat_data %>%
dplyr::mutate(mesh = recode(mesh, big = "accessible", middle = "accessible", no = "accessible", small = "not accessible"))
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_whe,
x= hole_d,
y = n_cobs,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Number of cobs",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Number of cobs relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(n_cobs ~ hole_d + mesh, data = bi_whe)
summary(mod)
##
## Call:
## lm(formula = n_cobs ~ hole_d + mesh, data = bi_whe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -321.11 -101.86 32.11 111.38 275.10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 190.66 75.26 2.533 0.0179 *
## hole_d 29.48 13.70 2.152 0.0413 *
## meshnot accessible 142.70 68.81 2.074 0.0485 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 169.3 on 25 degrees of freedom
## Multiple R-squared: 0.2828, Adjusted R-squared: 0.2254
## F-statistic: 4.929 on 2 and 25 DF, p-value: 0.01568
coef(mod)
## (Intercept) hole_d meshnot accessible
## 190.65668 29.48012 142.70179
# Distance effect only for accessible cages
acc_wheat <- bi_whe %>%
dplyr::filter(mesh == "accessible")
mod1 <- lm(n_cobs ~ hole_d, data = acc_wheat)
summary(mod1)
##
## Call:
## lm(formula = n_cobs ~ hole_d, data = acc_wheat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -338.88 -138.98 34.81 122.12 261.06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 79.58 100.17 0.794 0.438
## hole_d 53.09 19.38 2.739 0.014 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 180.6 on 17 degrees of freedom
## Multiple R-squared: 0.3062, Adjusted R-squared: 0.2654
## F-statistic: 7.502 on 1 and 17 DF, p-value: 0.01399
coef(mod1)
## (Intercept) hole_d
## 79.58426 53.08612
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_whe,
x= hole_d,
y = total_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Total mass",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Total mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(total_mass ~ hole_d + mesh, data = bi_whe)
summary(mod)
##
## Call:
## lm(formula = total_mass ~ hole_d + mesh, data = bi_whe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -502.52 -286.78 -42.55 201.45 741.76
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 235.75 160.42 1.47 0.1541
## hole_d 54.31 29.20 1.86 0.0747 .
## meshnot accessible 224.46 146.68 1.53 0.1385
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 360.9 on 25 degrees of freedom
## Multiple R-squared: 0.2037, Adjusted R-squared: 0.1399
## F-statistic: 3.197 on 2 and 25 DF, p-value: 0.05805
coef(mod)
## (Intercept) hole_d meshnot accessible
## 235.75301 54.31424 224.46438
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_whe,
x= hole_d,
y = grain_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Grain mass",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Grain mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(grain_mass ~ hole_d + mesh, data = bi_whe)
summary(mod)
##
## Call:
## lm(formula = grain_mass ~ hole_d + mesh, data = bi_whe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81333 -0.26952 0.07037 0.22484 0.68600
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.67920 0.18762 3.620 0.0013 **
## hole_d 0.06387 0.03415 1.870 0.0732 .
## meshnot accessible 0.25675 0.17154 1.497 0.1470
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.422 on 25 degrees of freedom
## Multiple R-squared: 0.2018, Adjusted R-squared: 0.1379
## F-statistic: 3.16 on 2 and 25 DF, p-value: 0.05977
coef(mod)
## (Intercept) hole_d meshnot accessible
## 0.67920385 0.06387193 0.25674814
## for reproducibility
set.seed(123)
grouped_ggscatterstats( data = bi_whe,
x= hole_d,
y = cob_mass,
grouping.var = mesh,
xlab = "Distance to nearest hole (m)",
ylab= "Cob mass (g)",
# arguments relevant for ggstatsplot::combine_plots
annotation.args = list(
title = "Cob mass relative to distance to nearest hole, grouped by mesh size"
),
plotgrid.args = list(nrow = 3, ncol = 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod <- lm(cob_mass ~ hole_d + mesh, data = bi_whe)
summary(mod)
##
## Call:
## lm(formula = cob_mass ~ hole_d + mesh, data = bi_whe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.98665 -0.33358 -0.02138 0.38533 0.85006
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.82378 0.23178 3.554 0.00154 **
## hole_d 0.07756 0.04219 1.838 0.07796 .
## meshnot accessible 0.28486 0.21192 1.344 0.19098
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5214 on 25 degrees of freedom
## Multiple R-squared: 0.1857, Adjusted R-squared: 0.1205
## F-statistic: 2.85 on 2 and 25 DF, p-value: 0.07672
coef(mod)
## (Intercept) hole_d meshnot accessible
## 0.82377832 0.07755616 0.28485542