Zooplankton Lab
Objectives
- Tidy messy dataframe so all variables are columns
- Join plankton and water quality dataframes together
- Produce a bar graph with the results of a means test of the abundance of each of the plankton groups
- Produce a bar graph with the results of a means test of species richness by “station”
- Produce a scatterplot graph with the results of a correlation test between abundance and salinity
- Produce a scatterplot graph with the results of a correlation test between species richness and salinity
Loading my functions
ggdynamiteplot makes means plots with error bars for sd
or se. It makes summary statistics inside the function, making dynamite
plots easier and faster to produce. theme_manuscript is a
themeing function which alters the appearance of the figures to meet the
requirements of the professor.
library(ggplot2)
ggdynamiteplot <- function(data, x, y, error = c("sd", "se"), width = 0.5) {
if (!(error %in% c("sd", "se"))) {
stop("Error: Error parameter must be either 'sd' or 'se'")
}
if (error == "sd") {
summary_stats <- data %>%
dplyr::group_by({{x}}) %>%
dplyr::summarize(
mean_y = mean({{y}}, na.rm = TRUE),
error_y = sd({{y}}, na.rm = TRUE)
) %>%
arrange(desc(mean_y))
} else if (error == "se") {
summary_stats <- data %>%
dplyr::group_by({{x}}) %>%
dplyr::summarize(
mean_y = mean({{y}}, na.rm = TRUE),
error_y = sd({{y}}, na.rm = TRUE) / sqrt(dplyr::n())
) %>%
arrange(desc(mean_y))
}
summary_stats <- summary_stats %>%
mutate({{x}} := factor({{x}}, levels = {{x}}[order(mean_y, decreasing = TRUE)]))
plot <- ggplot2::ggplot(summary_stats, ggplot2::aes(x = {{x}}, y = mean_y)) +
ggplot2::geom_bar(
stat = "identity",
position = "identity",
fill = "gray70",
width = width
) +
ggplot2::geom_errorbar(ggplot2::aes(ymin = mean_y - error_y, ymax = mean_y + error_y),
width = 0.2, color = "black", linewidth = 0.5) +
ggplot2::scale_y_continuous(expand = c(0,0)) +
ggplot2::labs(y = as_label(ensym(y)))
return(plot)
}
theme_manuscript =
function(font = NA, base_size = 12){
theme(
#Text font
text = element_text(family = font, size = base_size),
#Color
line = element_line(color = "black"),
panel.background = element_rect(fill = "white"), #fill plot background
legend.key = element_rect(fill = "white"), #fill background of legend key
axis.line = element_line(color = "black"), #color axes lines
#Positioning
legend.position = "right", #change legend position
#options: none, left, right, bottom, top
#Text sizes and options
plot.title = element_text(
size = base_size*1.3, #title size
face = "plain"), #options: plain, italic, bold, bold.italic
axis.title = element_text(size = base_size), #axis titles text size
axis.text = element_text(size = base_size, color = "black"), #axis labels/tick numbers text size
legend.text = element_text(size = base_size), #legend text size
legend.title = element_text(size = base_size) #legend title text size
)
}Importing data
These data are results from zooplankton counts we did in lab under dissection microscope. When the plankton were collected, water quality data were also collected for each water sample (temperature, salinity, and DO). The objectives of this lab were to identify differences in abundance among plankton groups and among stations. Additionally, to see if there was a correlation between salinity and species abundance or richness.
df_raw = read.csv("plankton1.csv", strip.white = TRUE, check.names = FALSE)
glimpse(df_raw)## Rows: 27
## Columns: 26
## $ Station <chr> "OUT", "OUT", "OUT", "OUT", "OUT", "OUT", …
## $ Sample <int> 1, 1, 1, 7, 7, 7, 13, 13, 13, 5, 5, 5, 9, …
## $ Chaetognath <int> 0, 1, 5, 0, 0, 0, 1, 4, 3, 8, 77, 161, 0, …
## $ Copepod <int> 141, 99, 133, 169, 305, 452, 214, 208, 272…
## $ `Cope. Nauplii` <int> 0, 0, 1, 1, 0, 0, 3, 0, 4, 10, 7, 8, 8, 3,…
## $ Lucifer <int> 1, 0, 0, 2, 2, 3, 8, 17, 3, 2, 2, 8, 3, 5,…
## $ Mysid <int> 0, 0, 0, 3, 5, 11, 79, 29, 19, 1, 4, 7, 20…
## $ `Crab Zoea` <int> 1, 1, 0, 0, 2, 2, 3, 17, 59, 2, 5, 4, 3, 8…
## $ `Crab Megalops` <int> 1, 0, 0, 4, 4, 17, 16, 26, 30, 0, 1, 0, 1,…
## $ `Fish larvae` <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Eggs <int> 0, 0, 4, 18, 33, 50, 6, 10, 9, 8, 26, 9, 4…
## $ `Unknown Gastropod Larvae` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ `Unknown Cephalopod Larvae` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Amphipods <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `Cnidarian Larvae` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Sacphopod <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Salp <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `Segmented Worm Larvae` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ Ostrocod <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 47, 0, …
## $ Isopod <int> 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ `Brittle star` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `Polychaete larvae` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Laravcean <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `Unknown Crustcean` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Jelly <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 41, 0, …
## $ Snail <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
wq = read.csv("plankton_wq.csv", strip.white = TRUE)
wq <- dplyr::select(wq, -Station)
head(wq)## Sample Temperature Salinity DO
## 1 1 24.3 35.3 6.74
## 2 7 24.4 35.4 6.73
## 3 13 24.1 35.5 6.71
## 4 5 23.6 30.8 6.78
## 5 9 23.7 30.9 6.82
## 6 15 23.9 30.8 6.86
Tidy data
To make this data tidy, each variable needs to be a column. So, there needs to be one column for Plankton Group.
library(tidyr)
library(dplyr)
df <- df_raw %>%
tidyr::pivot_longer(
cols = "Chaetognath":"Snail",
names_to = "Plankton_Group",
values_to = "Count"
) %>%
dplyr::group_by(Station, Sample, Plankton_Group) %>%
dplyr::summarize(Total_Count = sum(Count))## `summarise()` has grouped output by 'Station', 'Sample'. You can override using
## the `.groups` argument.
df$Plankton_Group <- as.factor(df$Plankton_Group)
head(df)## # A tibble: 6 × 4
## # Groups: Station, Sample [1]
## Station Sample Plankton_Group Total_Count
## <chr> <int> <fct> <int>
## 1 MID 5 Amphipods 0
## 2 MID 5 Brittle star 0
## 3 MID 5 Chaetognath 246
## 4 MID 5 Cnidarian Larvae 0
## 5 MID 5 Cope. Nauplii 25
## 6 MID 5 Copepod 235
Part A
A bar graph with the results of a means test of the abundance of each of the plankton groups
ggdynamiteplot(df, Plankton_Group, Total_Count, error = "se", width = 0.25) +
labs(x = "Plankton Group", y = "Count") +
theme_manuscript(font = "Times", base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))shapiro.test(df$Total_Count)##
## Shapiro-Wilk normality test
##
## data: df$Total_Count
## W = 0.28231, p-value < 2.2e-16
kruskal.test(Total_Count ~ Plankton_Group, data = df)##
## Kruskal-Wallis rank sum test
##
## data: Total_Count by Plankton_Group
## Kruskal-Wallis chi-squared = 149.31, df = 23, p-value < 2.2e-16
# Perform post-hoc Dunn's test
library(FSA)
library(rcompanion)
Phocdunn <- dunnTest(Total_Count ~ Plankton_Group, data = df, method="bonferroni")
Phocdunns <- Phocdunn$res
cld <- cldList(comparison = Phocdunns$Comparison,
p.value = Phocdunns$P.adj,
threshold = 0.05)[1:2]
names(cld)[1]<-"Plankton_Group" # change the name of grouping factor according to the dataset (df)
cld## Plankton_Group Letter
## 1 Amphipods a
## 2 Brittlestar ab
## 3 Chaetognath abcd
## 4 CnidarianLarvae ab
## 5 Cope.Nauplii abcd
## 6 Copepod c
## 7 CrabMegalops abcd
## 8 CrabZoea cd
## 9 Eggs cd
## 10 Fishlarvae abd
## 11 Isopod abd
## 12 Jelly abd
## 13 Laravcean ab
## 14 Lucifer bcd
## 15 Mysid bcd
## 16 Ostrocod abd
## 17 Polychaetelarvae abd
## 18 Sacphopod a
## 19 Salp a
## 20 SegmentedWormLarvae ab
## 21 Snail ab
## 22 UnknownCephalopodLarvae a
## 23 UnknownCrustcean ab
## 24 UnknownGastropodLarvae abd
Part B
A bar graph with the results of a means test of species richness by “station”
species_richness <- df %>%
dplyr::group_by(Station, Sample) %>%
dplyr::filter(Total_Count != 0) %>%
dplyr::summarize(Species_count = n_distinct(Plankton_Group))## `summarise()` has grouped output by 'Station'. You can override using the
## `.groups` argument.
head(species_richness)## # A tibble: 6 × 3
## # Groups: Station [2]
## Station Sample Species_count
## <chr> <int> <int>
## 1 MID 5 10
## 2 MID 9 12
## 3 MID 15 14
## 4 OUT 1 8
## 5 OUT 7 8
## 6 OUT 13 8
ggdynamiteplot(species_richness, Station, Species_count, error = "se") +
labs(x = "Station", y = "Species Richness") +
theme_manuscript(font = "Times")shapiro.test(species_richness$Species_count)##
## Shapiro-Wilk normality test
##
## data: species_richness$Species_count
## W = 0.89577, p-value = 0.2285
library(agricolae)
model <- aov(Species_count ~ Station, data = species_richness)
summary(model)## Df Sum Sq Mean Sq F value Pr(>F)
## Station 2 24 12.000 7.2 0.0254 *
## Residuals 6 10 1.667
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey_result <- HSD.test(model, trt = "Station")
print(tukey_result$groups)## Species_count groups
## MID 12 a
## WAI 10 ab
## OUT 8 b
Part C
A scatterplot graph with the results of a correlation test between abundance and salinity
df_C <- df %>%
dplyr::group_by(Station, Sample) %>%
dplyr::summarize(Total_abund = sum(Total_Count)) %>%
dplyr::left_join(y = wq, by = "Sample")## `summarise()` has grouped output by 'Station'. You can override using the
## `.groups` argument.
ggplot(
data = df_C,
mapping = aes(x = Salinity, y = Total_abund)
) +
geom_point() +
scale_y_continuous(limits = c(0,1200))+
scale_x_continuous(limits = c(30,36),
breaks = seq(30,36, by = 2),
expand = c(0, 0)) +
labs(x = "Salinity", y = "Abundance") +
theme_manuscript(font = "Times", base_size = 11)cor.test(df_C$Salinity, df_C$Total_abund, method = "spearman")##
## Spearman's rank correlation rho
##
## data: df_C$Salinity and df_C$Total_abund
## S = 50.711, p-value = 0.1035
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5774109
Part D
A scatterplot graph with the results of a correlation test between species richness and salinity
df_D <- df %>%
dplyr::group_by(Station, Sample) %>%
dplyr::filter(Total_Count != 0) %>%
dplyr::summarize(Species_count = n_distinct(Plankton_Group)) %>%
dplyr::left_join(y = wq, by = "Sample")## `summarise()` has grouped output by 'Station'. You can override using the
## `.groups` argument.
ggplot(
data = df_D,
mapping = aes(x = Salinity, y = Species_count)
) +
geom_point() +
scale_x_continuous(limits = c(30,36),
breaks = seq(30,36, by = 2),
expand = c(0, 0)) +
labs(x = "Salinity", y = "Species Richness") +
theme_manuscript(font = "Times", base_size = 11)cor.test(df_D$Salinity, df_D$Species_count, method = "spearman")##
## Spearman's rank correlation rho
##
## data: df_D$Salinity and df_D$Species_count
## S = 216.94, p-value = 0.008444
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.8078103