Introduction

Dung beetles are nature’s recyclers. They bury and feed on animal dung with nutrient cycling and improve soil health. In this example, we’ll stimulate data to test if more cow dung attracts more beetles, species richness (number of species) and abundance (total number of beetles collected).

We’ll test if the differences are statistically significant and visualize the results.

Step 1: Load libraries

We’ll use ‘tidyverse’ for data wrangling and plotting.

library(tidyverse) #preparing data for analysis
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2) #creating graphics
library(dplyr) #for data manipulation, cleaning, and transformation

Step 2: Read the dataset

Make sure the file dung_beetle_feeding.csv is saved in your working directory.

# Read CSV file
beetle_data <- read_csv("dung_beetle_feeding.csv")
## Rows: 20 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Dung_pat_g, Abundance, Richness
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Preview the data
head(beetle_data)
## # A tibble: 6 × 3
##   Dung_pat_g Abundance Richness
##        <dbl>     <dbl>    <dbl>
## 1         50        15        7
## 2         50        17        3
## 3         50        13        5
## 4         50        20        6
## 5         50        16        4
## 6        100        19        8
str(beetle_data)
## spc_tbl_ [20 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Dung_pat_g: num [1:20] 50 50 50 50 50 100 100 100 100 100 ...
##  $ Abundance : num [1:20] 15 17 13 20 16 19 21 18 25 23 ...
##  $ Richness  : num [1:20] 7 3 5 6 4 8 10 9 6 7 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Dung_pat_g = col_double(),
##   ..   Abundance = col_double(),
##   ..   Richness = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Step 3: Summary statistics

Quick overview of our field data

summary(beetle_data)
##    Dung_pat_g      Abundance        Richness    
##  Min.   : 50.0   Min.   :13.00   Min.   : 3.00  
##  1st Qu.: 87.5   1st Qu.:18.75   1st Qu.: 6.75  
##  Median :125.0   Median :26.00   Median : 9.00  
##  Mean   :125.0   Mean   :28.75   Mean   : 9.15  
##  3rd Qu.:162.5   3rd Qu.:38.25   3rd Qu.:11.25  
##  Max.   :200.0   Max.   :52.00   Max.   :16.00
## Use a histogram to visualize the spread of the data
hist(beetle_data$Abundance)

hist(beetle_data$Richness)

Step 4: Data visualization

# Species Richness plot
ggplot(beetle_data, aes(x = as.factor(Dung_pat_g), y = Richness)) +
  geom_boxplot(fill = "lightgreen") +
    geom_point(colour = "gray30", size = 2, alpha = 0.5) +
  labs(x = "Amount of dung (g)", y = "Species Richness",
       title = "Dung Beetle Species Richness by Dung Amount") +
  theme_classic()

# Abundance plot
ggplot(beetle_data, aes(x = as.factor(Dung_pat_g), y = Abundance)) +
  geom_boxplot(fill = "lightblue") +
  geom_point(colour = "gray30", size = 2, alpha = 0.5) +
  labs(x = "Amount of dung (g)", y = "Abundance",
       title = "Dung Beetle Abundance by Dung Amount") +
  theme_classic()

Step 5: Plot the means and confidence intervals

#Use Ctrl + Shift + M to make a tydiverse pipe operator ( %>% )
#The native R pipe operator is ( |> )

# Richness
beetle_summary_richness <-
  beetle_data %>% 
  group_by(Dung_pat_g) %>%
  summarise(count = n(),
               mean = mean(Richness, na.rm = TRUE),
               ssd = sd(Richness, na.rm = TRUE)) %>%
     mutate(se = ssd / sqrt(count),
            lower_ci = mean - qt(1 - (0.05 / 2), count - 1) * se,
            upper_ci = mean + qt(1 - (0.05 / 2), count - 1) * se)


# Nice table output
knitr::kable(beetle_summary_richness)
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
## Warning in attr(x, "format"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
Dung_pat_g count mean ssd se lower_ci upper_ci
50 5 5.0 1.581139 0.7071068 3.036757 6.963243
100 5 8.0 1.581139 0.7071068 6.036757 9.963243
150 5 10.4 2.302173 1.0295630 7.541475 13.258525
200 5 13.2 1.923538 0.8602325 10.811612 15.588388
# Abundance
beetle_summary_abundance <-
  beetle_data %>% 
  group_by(Dung_pat_g) %>%
  summarise(count = n(),
               mean = mean(Abundance, na.rm = TRUE),
               ssd = sd(Abundance, na.rm = TRUE)) %>%
     mutate(se = ssd / sqrt(count),
            lower_ci = mean - qt(1 - (0.05 / 2), count - 1) * se,
            upper_ci = mean + qt(1 - (0.05 / 2), count - 1) * se)


# Nice table output
knitr::kable(beetle_summary_abundance)
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
Dung_pat_g count mean ssd se lower_ci upper_ci
50 5 16.2 2.588436 1.157584 12.98603 19.41397
100 5 21.2 2.863564 1.280625 17.64442 24.75558
150 5 32.2 4.438468 1.984943 26.68891 37.71109
200 5 45.4 5.412947 2.420744 38.67894 52.12106

Instead of a boxplot (which shows the median and quartiles), we plot the means and confidence intervals:

# Richness
beetle_summary_richness %>% ggplot() +
                geom_point(aes(x = Dung_pat_g, y = mean),
                           colour = "lightgreen", size = 3) +
                geom_errorbar(aes(x = Dung_pat_g,
                                  ymin = lower_ci,
                                  ymax = upper_ci),colour = "lightgreen",
                              width = 3, size = 1) +
                labs(x = "Amount of dung (g)", y = "Richness") +
                theme_classic() +
                theme(legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Abundance
beetle_summary_abundance %>% ggplot() +
                geom_point(aes(x = Dung_pat_g, y = mean),
                           colour = "lightblue", size = 3) +
                geom_errorbar(aes(x = Dung_pat_g,
                                  ymin = lower_ci,
                                  ymax = upper_ci),colour = "lightblue",
                              width = 3, size = 1) +
                labs(x = "Amount of dung (g)", y = "Abundance") +
                theme_classic() +
                theme(legend.position = "none")

Step 6: Interpretation

  • Look for patterns: Does richness or abundance increase with dung amount?

  • Are the differences statistically significant?

  • Consider ecological explanations.

In real ecology study, what we might also measure?

Step 7: Save and share

You can knit this .Rmd to:

  • HTML for interactive viewing

  • PDF for printing

  • Word for editing

This makes it easy to share your code, data, and results with your supervisor — all in one reproducible report.