macroinvertebrate figures
# libraries
library(ggplot2)
library(readr)
results <- read.csv("real_macro_results.V1.csv")
View(results)
#summary stats
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
sum1 <- summarySE(data = results, groupvars = "leaf_species", measurevar = "total_count", na.rm = TRUE)
sum1
## leaf_species N total_count sd se ci
## 1 holly 9 20.88889 13.261264 4.420421 10.193510
## 2 sycamore 9 14.44444 6.267199 2.089066 4.817395
# plot
p1 <- ggplot(sum1, aes(x = leaf_species, y = total_count, fill = leaf_species)) + geom_col() + geom_errorbar(aes(ymin = total_count - se, ymax = total_count + se), size = 0.8, width = 0.5) +
# theme
theme_classic() +
# labels
labs(title = "Macroinvertebrate Density by Leaf Species",
x = "Leaf Species",
y = "Number of Macroinvertebrates Present")
p1

# t-test
ttest1 <- t.test(results $ total_count ~ results $ leaf_species)
ttest1
##
## Welch Two Sample t-test
##
## data: results$total_count by results$leaf_species
## t = 1.3181, df = 11.404, p-value = 0.2133
## alternative hypothesis: true difference in means between group holly and group sycamore is not equal to 0
## 95 percent confidence interval:
## -4.270322 17.159211
## sample estimates:
## mean in group holly mean in group sycamore
## 20.88889 14.44444
# plot 2 # plot 2 TRUE
sum2 <- summarySE(data = results, groupvars = c("leaf_species", "site"), measurevar = "total_count", na.rm = TRUE)
sum2
## leaf_species site N total_count sd se ci
## 1 holly 1 3 37.00000 2.000000 1.1547005 4.968275
## 2 holly 2 3 15.66667 7.234178 4.1766547 17.970695
## 3 holly 3 0 NaN NA NA NaN
## 4 holly 4 3 10.00000 6.244998 3.6055513 15.513435
## 5 sycamore 1 3 14.66667 1.527525 0.8819171 3.794583
## 6 sycamore 2 3 16.33333 7.094599 4.0960686 17.623961
## 7 sycamore 3 0 NaN NA NA NaN
## 8 sycamore 4 3 12.33333 9.609024 5.5477723 23.870138
sum2_clean <- na.omit(sum2)
sum2_clean
## leaf_species site N total_count sd se ci
## 1 holly 1 3 37.00000 2.000000 1.1547005 4.968275
## 2 holly 2 3 15.66667 7.234178 4.1766547 17.970695
## 4 holly 4 3 10.00000 6.244998 3.6055513 15.513435
## 5 sycamore 1 3 14.66667 1.527525 0.8819171 3.794583
## 6 sycamore 2 3 16.33333 7.094599 4.0960686 17.623961
## 8 sycamore 4 3 12.33333 9.609024 5.5477723 23.870138
# make graph
p2 <- ggplot(sum2_clean, aes(x = factor(site), y = total_count, fill = leaf_species)) +
geom_col(position = position_dodge(width = 1), width = 0.8) +
#error bar
geom_errorbar(aes(ymin = total_count - se, ymax = total_count + se),
position = position_dodge(width = 1), width = 0.4, size = 0.5) +
# theme
theme_classic() +
# labels
labs(title = "Macroinvertebrate Density by Site and Leaf Species",
x = "Site",
y = "Total Macroinvertebrate Count",
fill = "Leaf Species")
p2

# ANOVA
holly_data <- subset(results, leaf_species == "holly")
anova_holly <- aov(total_count ~ as.factor(site), data = holly_data)
summary(anova_holly) # anova for holly sites
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 1216.2 608.1 19.14 0.00249 **
## Residuals 6 190.7 31.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness
# tukey test for holly:
TukeyHSD(anova_holly)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total_count ~ as.factor(site), data = holly_data)
##
## $`as.factor(site)`
## diff lwr upr p adj
## 2-1 -21.333333 -35.45579 -7.210874 0.0084812
## 4-1 -27.000000 -41.12246 -12.877541 0.0026269
## 4-2 -5.666667 -19.78913 8.455793 0.4794759
sycamore_data <- subset(results, leaf_species == "sycamore")
anova_sycamore <- aov(total_count ~ as.factor(site), data = sycamore_data)
summary(anova_sycamore) # anova for sycamore sites
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 24.22 12.11 0.251 0.786
## Residuals 6 290.00 48.33
## 3 observations deleted due to missingness
# no significant different for sycamore, no need for a tukey test
# graph 3
sum3 <- summarySE(data = results, groupvars = "site", measurevar = "total_count", na.rm = TRUE)
sum3
## site N total_count sd se ci
## 1 1 6 25.83333 12.335585 5.035982 12.945403
## 2 2 6 16.00000 6.418723 2.620433 6.736036
## 3 3 0 NaN NA NA NaN
## 4 4 6 11.16667 7.359801 3.004626 7.723637
sum3_clean = na.omit(sum3)
sum3_clean
## site N total_count sd se ci
## 1 1 6 25.83333 12.335585 5.035982 12.945403
## 2 2 6 16.00000 6.418723 2.620433 6.736036
## 4 4 6 11.16667 7.359801 3.004626 7.723637
p3 <- ggplot(sum3_clean, aes(x = factor(site), y = total_count)) + geom_col(aes(fill = c("darkblue", "blue", "lightblue2"))) + geom_errorbar(aes(ymin = total_count - se, ymax = total_count + se), size = 0.6, width = 0.5) +
theme_classic() +
theme(legend.position = "none") +
# labels
labs(title = "Total Macroinvertebrate Density by Site",
x = "Site",
y = "NUmber of Macroinvertebrates")
p3

# ANOVA
anova_all_sites <- aov(total_count ~ as.factor(site), data = results)
summary(anova_all_sites)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 670.3 335.2 4.062 0.0389 *
## Residuals 15 1237.7 82.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 6 observations deleted due to missingness
# significance
TukeyHSD(anova_all_sites)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total_count ~ as.factor(site), data = results)
##
## $`as.factor(site)`
## diff lwr upr p adj
## 2-1 -9.833333 -23.45550 3.788833 0.1801116
## 4-1 -14.666667 -28.28883 -1.044501 0.0341683
## 4-2 -4.833333 -18.45550 8.788833 0.6354535
multipanel plot
# lib
library(cowplot) #plot_grid()
# Load the necessary library
library(cowplot)
# Create the multipanel figure
multi_plot1 <- plot_grid(
p1 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
p2 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
p3 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
ncol = 1, # number of columns
labels = c("A", "B", "C"), # panel labels
label_size = 10,
label_y = c(1.05, 1.1, 1.1),
align = 'v' # align vertically
)
# Display the plot
multi_plot1

# Create the multipanel figure
multi_plot2 <- plot_grid(
p1 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
p3 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
p2 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
ncol = 2, # number of columns
labels = c("A", "B", "C"), # panel labels
label_size = 10,
label_y = c(1.03, 1.03, 1.09),
align = 'v' # align vertically
)
# Display the plot
multi_plot2

# Save the multi-panel plot as a .PNG
ggsave(
filename = "macroinvertebrate_multipanel_plot.png", # filename
plot = multi_plot2, # the plot object
width = 10, # width in inches
height = 12, # height in inches
dpi = 300 # resolution (300 dpi is standard for publication)
)
plot1 and statistical test
p1

ttest1
##
## Welch Two Sample t-test
##
## data: results$total_count by results$leaf_species
## t = 1.3181, df = 11.404, p-value = 0.2133
## alternative hypothesis: true difference in means between group holly and group sycamore is not equal to 0
## 95 percent confidence interval:
## -4.270322 17.159211
## sample estimates:
## mean in group holly mean in group sycamore
## 20.88889 14.44444
plot2 and statistical test
p2

summary(anova_holly)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 1216.2 608.1 19.14 0.00249 **
## Residuals 6 190.7 31.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness
TukeyHSD(anova_holly)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total_count ~ as.factor(site), data = holly_data)
##
## $`as.factor(site)`
## diff lwr upr p adj
## 2-1 -21.333333 -35.45579 -7.210874 0.0084812
## 4-1 -27.000000 -41.12246 -12.877541 0.0026269
## 4-2 -5.666667 -19.78913 8.455793 0.4794759
summary(anova_sycamore) # anova for sycamore sites
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 24.22 12.11 0.251 0.786
## Residuals 6 290.00 48.33
## 3 observations deleted due to missingness
# no significant different for sycamore, no need for a tukey test
plot3 and statistical test
p3

summary(anova_all_sites)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 670.3 335.2 4.062 0.0389 *
## Residuals 15 1237.7 82.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 6 observations deleted due to missingness
# significance
TukeyHSD(anova_all_sites)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total_count ~ as.factor(site), data = results)
##
## $`as.factor(site)`
## diff lwr upr p adj
## 2-1 -9.833333 -23.45550 3.788833 0.1801116
## 4-1 -14.666667 -28.28883 -1.044501 0.0341683
## 4-2 -4.833333 -18.45550 8.788833 0.6354535
ALL T and ANOVA tests
ttest1
##
## Welch Two Sample t-test
##
## data: results$total_count by results$leaf_species
## t = 1.3181, df = 11.404, p-value = 0.2133
## alternative hypothesis: true difference in means between group holly and group sycamore is not equal to 0
## 95 percent confidence interval:
## -4.270322 17.159211
## sample estimates:
## mean in group holly mean in group sycamore
## 20.88889 14.44444
summary(anova_holly)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 1216.2 608.1 19.14 0.00249 **
## Residuals 6 190.7 31.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness
TukeyHSD(anova_holly)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total_count ~ as.factor(site), data = holly_data)
##
## $`as.factor(site)`
## diff lwr upr p adj
## 2-1 -21.333333 -35.45579 -7.210874 0.0084812
## 4-1 -27.000000 -41.12246 -12.877541 0.0026269
## 4-2 -5.666667 -19.78913 8.455793 0.4794759
summary(anova_sycamore) # anova for sycamore sites
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 24.22 12.11 0.251 0.786
## Residuals 6 290.00 48.33
## 3 observations deleted due to missingness
# no significant different for sycamore, no need for a tukey test
summary(anova_all_sites)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site) 2 670.3 335.2 4.062 0.0389 *
## Residuals 15 1237.7 82.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 6 observations deleted due to missingness
# significance
TukeyHSD(anova_all_sites)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total_count ~ as.factor(site), data = results)
##
## $`as.factor(site)`
## diff lwr upr p adj
## 2-1 -9.833333 -23.45550 3.788833 0.1801116
## 4-1 -14.666667 -28.28883 -1.044501 0.0341683
## 4-2 -4.833333 -18.45550 8.788833 0.6354535