macroinvertebrate figures

# libraries 
library(ggplot2)
library(readr)


results <- read.csv("real_macro_results.V1.csv")
View(results)

#summary stats
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
sum1 <- summarySE(data = results, groupvars = "leaf_species", measurevar = "total_count", na.rm = TRUE)
sum1
##   leaf_species N total_count        sd       se        ci
## 1        holly 9    20.88889 13.261264 4.420421 10.193510
## 2     sycamore 9    14.44444  6.267199 2.089066  4.817395
# plot
p1 <- ggplot(sum1, aes(x = leaf_species, y = total_count, fill = leaf_species)) + geom_col() + geom_errorbar(aes(ymin = total_count - se, ymax = total_count + se), size = 0.8, width = 0.5) + 
# theme
  theme_classic() + 
# labels
  labs(title = "Macroinvertebrate Density by Leaf Species", 
       x = "Leaf Species", 
       y = "Number of Macroinvertebrates Present")
p1

# t-test

ttest1 <- t.test(results $ total_count ~ results $ leaf_species)
ttest1
## 
##  Welch Two Sample t-test
## 
## data:  results$total_count by results$leaf_species
## t = 1.3181, df = 11.404, p-value = 0.2133
## alternative hypothesis: true difference in means between group holly and group sycamore is not equal to 0
## 95 percent confidence interval:
##  -4.270322 17.159211
## sample estimates:
##    mean in group holly mean in group sycamore 
##               20.88889               14.44444
# plot 2 # plot 2 TRUE
sum2 <- summarySE(data = results, groupvars = c("leaf_species", "site"), measurevar = "total_count", na.rm = TRUE)
sum2
##   leaf_species site N total_count       sd        se        ci
## 1        holly    1 3    37.00000 2.000000 1.1547005  4.968275
## 2        holly    2 3    15.66667 7.234178 4.1766547 17.970695
## 3        holly    3 0         NaN       NA        NA       NaN
## 4        holly    4 3    10.00000 6.244998 3.6055513 15.513435
## 5     sycamore    1 3    14.66667 1.527525 0.8819171  3.794583
## 6     sycamore    2 3    16.33333 7.094599 4.0960686 17.623961
## 7     sycamore    3 0         NaN       NA        NA       NaN
## 8     sycamore    4 3    12.33333 9.609024 5.5477723 23.870138
sum2_clean <- na.omit(sum2)
sum2_clean
##   leaf_species site N total_count       sd        se        ci
## 1        holly    1 3    37.00000 2.000000 1.1547005  4.968275
## 2        holly    2 3    15.66667 7.234178 4.1766547 17.970695
## 4        holly    4 3    10.00000 6.244998 3.6055513 15.513435
## 5     sycamore    1 3    14.66667 1.527525 0.8819171  3.794583
## 6     sycamore    2 3    16.33333 7.094599 4.0960686 17.623961
## 8     sycamore    4 3    12.33333 9.609024 5.5477723 23.870138
# make graph 


p2 <- ggplot(sum2_clean, aes(x = factor(site), y = total_count, fill = leaf_species)) +
  geom_col(position = position_dodge(width = 1), width = 0.8) +
#error bar
  geom_errorbar(aes(ymin = total_count - se, ymax = total_count + se),
                position = position_dodge(width = 1), width = 0.4, size = 0.5) +
# theme
  theme_classic() +
# labels
  labs(title = "Macroinvertebrate Density by Site and Leaf Species",
       x = "Site",
       y = "Total Macroinvertebrate Count",
       fill = "Leaf Species") 
p2

# ANOVA 
holly_data <- subset(results, leaf_species == "holly")
anova_holly <- aov(total_count ~ as.factor(site), data = holly_data)
summary(anova_holly) # anova for holly sites 
##                 Df Sum Sq Mean Sq F value  Pr(>F)   
## as.factor(site)  2 1216.2   608.1   19.14 0.00249 **
## Residuals        6  190.7    31.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness
# tukey test for holly: 
TukeyHSD(anova_holly)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = total_count ~ as.factor(site), data = holly_data)
## 
## $`as.factor(site)`
##           diff       lwr        upr     p adj
## 2-1 -21.333333 -35.45579  -7.210874 0.0084812
## 4-1 -27.000000 -41.12246 -12.877541 0.0026269
## 4-2  -5.666667 -19.78913   8.455793 0.4794759
sycamore_data <- subset(results, leaf_species == "sycamore")
anova_sycamore <- aov(total_count ~ as.factor(site), data = sycamore_data)
summary(anova_sycamore) # anova for sycamore sites
##                 Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site)  2  24.22   12.11   0.251  0.786
## Residuals        6 290.00   48.33               
## 3 observations deleted due to missingness
# no significant different for sycamore, no need for a tukey test 

  
  # graph 3 
sum3 <- summarySE(data = results, groupvars = "site", measurevar = "total_count", na.rm = TRUE)
sum3
##   site N total_count        sd       se        ci
## 1    1 6    25.83333 12.335585 5.035982 12.945403
## 2    2 6    16.00000  6.418723 2.620433  6.736036
## 3    3 0         NaN        NA       NA       NaN
## 4    4 6    11.16667  7.359801 3.004626  7.723637
sum3_clean = na.omit(sum3)
sum3_clean
##   site N total_count        sd       se        ci
## 1    1 6    25.83333 12.335585 5.035982 12.945403
## 2    2 6    16.00000  6.418723 2.620433  6.736036
## 4    4 6    11.16667  7.359801 3.004626  7.723637
p3 <- ggplot(sum3_clean, aes(x = factor(site), y = total_count)) + geom_col(aes(fill = c("darkblue", "blue", "lightblue2"))) + geom_errorbar(aes(ymin = total_count - se, ymax = total_count + se), size = 0.6, width = 0.5) + 
  theme_classic() + 
  theme(legend.position = "none") + 
# labels 
  labs(title = "Total Macroinvertebrate Density by Site", 
       x = "Site", 
       y = "NUmber of Macroinvertebrates") 
p3

# ANOVA
anova_all_sites <- aov(total_count ~ as.factor(site), data = results)
summary(anova_all_sites)
##                 Df Sum Sq Mean Sq F value Pr(>F)  
## as.factor(site)  2  670.3   335.2   4.062 0.0389 *
## Residuals       15 1237.7    82.5                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 6 observations deleted due to missingness
# significance 
TukeyHSD(anova_all_sites)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = total_count ~ as.factor(site), data = results)
## 
## $`as.factor(site)`
##           diff       lwr       upr     p adj
## 2-1  -9.833333 -23.45550  3.788833 0.1801116
## 4-1 -14.666667 -28.28883 -1.044501 0.0341683
## 4-2  -4.833333 -18.45550  8.788833 0.6354535

multipanel plot

# lib
library(cowplot) #plot_grid()

# Load the necessary library
library(cowplot)

# Create the multipanel figure
multi_plot1 <- plot_grid(
  p1 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
  p2 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()), 
  p3 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
  ncol = 1,                    # number of columns
  labels = c("A", "B", "C"),   # panel labels
  label_size = 10,
  label_y = c(1.05, 1.1, 1.1),
  align = 'v'                  # align vertically
)

# Display the plot
multi_plot1

# Create the multipanel figure
multi_plot2 <- plot_grid(
  p1 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
  p3 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()), 
  p2 + theme(plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()),
  ncol = 2,                    # number of columns
  labels = c("A", "B", "C"),   # panel labels
  label_size = 10,
  label_y = c(1.03, 1.03, 1.09),
  align = 'v'                  # align vertically
)

# Display the plot
multi_plot2

# Save the multi-panel plot as a .PNG

ggsave(
  filename = "macroinvertebrate_multipanel_plot.png",  # filename
  plot = multi_plot2,                                   # the plot object
  width = 10,                                          # width in inches
  height = 12,                                         # height in inches
  dpi = 300                                            # resolution (300 dpi is standard for publication)
)

plot1 and statistical test

p1

ttest1
## 
##  Welch Two Sample t-test
## 
## data:  results$total_count by results$leaf_species
## t = 1.3181, df = 11.404, p-value = 0.2133
## alternative hypothesis: true difference in means between group holly and group sycamore is not equal to 0
## 95 percent confidence interval:
##  -4.270322 17.159211
## sample estimates:
##    mean in group holly mean in group sycamore 
##               20.88889               14.44444

plot2 and statistical test

p2

summary(anova_holly)
##                 Df Sum Sq Mean Sq F value  Pr(>F)   
## as.factor(site)  2 1216.2   608.1   19.14 0.00249 **
## Residuals        6  190.7    31.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness
TukeyHSD(anova_holly)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = total_count ~ as.factor(site), data = holly_data)
## 
## $`as.factor(site)`
##           diff       lwr        upr     p adj
## 2-1 -21.333333 -35.45579  -7.210874 0.0084812
## 4-1 -27.000000 -41.12246 -12.877541 0.0026269
## 4-2  -5.666667 -19.78913   8.455793 0.4794759
summary(anova_sycamore) # anova for sycamore sites
##                 Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site)  2  24.22   12.11   0.251  0.786
## Residuals        6 290.00   48.33               
## 3 observations deleted due to missingness
# no significant different for sycamore, no need for a tukey test 

plot3 and statistical test

p3

summary(anova_all_sites)
##                 Df Sum Sq Mean Sq F value Pr(>F)  
## as.factor(site)  2  670.3   335.2   4.062 0.0389 *
## Residuals       15 1237.7    82.5                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 6 observations deleted due to missingness
# significance 
TukeyHSD(anova_all_sites)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = total_count ~ as.factor(site), data = results)
## 
## $`as.factor(site)`
##           diff       lwr       upr     p adj
## 2-1  -9.833333 -23.45550  3.788833 0.1801116
## 4-1 -14.666667 -28.28883 -1.044501 0.0341683
## 4-2  -4.833333 -18.45550  8.788833 0.6354535

ALL T and ANOVA tests

ttest1
## 
##  Welch Two Sample t-test
## 
## data:  results$total_count by results$leaf_species
## t = 1.3181, df = 11.404, p-value = 0.2133
## alternative hypothesis: true difference in means between group holly and group sycamore is not equal to 0
## 95 percent confidence interval:
##  -4.270322 17.159211
## sample estimates:
##    mean in group holly mean in group sycamore 
##               20.88889               14.44444
summary(anova_holly)
##                 Df Sum Sq Mean Sq F value  Pr(>F)   
## as.factor(site)  2 1216.2   608.1   19.14 0.00249 **
## Residuals        6  190.7    31.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness
TukeyHSD(anova_holly)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = total_count ~ as.factor(site), data = holly_data)
## 
## $`as.factor(site)`
##           diff       lwr        upr     p adj
## 2-1 -21.333333 -35.45579  -7.210874 0.0084812
## 4-1 -27.000000 -41.12246 -12.877541 0.0026269
## 4-2  -5.666667 -19.78913   8.455793 0.4794759
summary(anova_sycamore) # anova for sycamore sites
##                 Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(site)  2  24.22   12.11   0.251  0.786
## Residuals        6 290.00   48.33               
## 3 observations deleted due to missingness
# no significant different for sycamore, no need for a tukey test 

summary(anova_all_sites)
##                 Df Sum Sq Mean Sq F value Pr(>F)  
## as.factor(site)  2  670.3   335.2   4.062 0.0389 *
## Residuals       15 1237.7    82.5                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 6 observations deleted due to missingness
# significance 
TukeyHSD(anova_all_sites)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = total_count ~ as.factor(site), data = results)
## 
## $`as.factor(site)`
##           diff       lwr       upr     p adj
## 2-1  -9.833333 -23.45550  3.788833 0.1801116
## 4-1 -14.666667 -28.28883 -1.044501 0.0341683
## 4-2  -4.833333 -18.45550  8.788833 0.6354535