library(tidyverse)
library(here)
library(readxl)
library(janitor)
library(cowplot)
library(ggfortify)
library(lme4)
library(rstatix)
library(ggplot2)
library(cowplot)
library(dplyr)
library(ggplot2)Dissertation Data Analysis
Load packages
Load data
filepath_data <- here("Data collection","Results.xlsx")
mydata <- read_excel(filepath_data,"Sheet2")
glimpse(mydata)Rows: 36
Columns: 7
$ Position <chr> "Above", "Above", "Above", "Below", "Below", "Below", "Abo…
$ Month <chr> "April", "April", "April", "April", "April", "April", "May…
$ pH <dbl> 6.8, 6.7, 6.7, 6.5, 6.5, 6.5, 6.8, 6.6, 6.6, 6.6, 6.6, 6.4…
$ Temperature <dbl> 11.2, 11.8, 11.3, 11.0, 10.9, 10.9, 11.7, 11.8, 11.8, 11.0…
$ TDS <dbl> 85, 88, 87, 85, 87, 87, 88, 87, 87, 83, 84, 86, 84, 87, 84…
$ DO <dbl> 17.1, 17.5, 17.3, 17.7, 17.8, 17.5, 16.5, 17.2, 16.3, 16.7…
$ Nitrates <dbl> 3.916, 3.570, 3.134, 2.635, 4.433, 3.887, 3.930, 4.179, 4.…
pH Analysis
Summarise
pH_summary <- mydata |>
group_by(Position) |>
summarise(n = n(), mean_pH = mean(pH), sd_pH = sd(pH), se_pH = sd(pH)/sqrt(n()))
pH_summary# A tibble: 2 × 5
Position n mean_pH sd_pH se_pH
<chr> <int> <dbl> <dbl> <dbl>
1 Above 18 6.65 0.195 0.0459
2 Below 18 6.49 0.108 0.0254
Plot
Bar Chart
pH_summary |>
ggplot(aes(x = Position, y = mean_pH)) +
geom_col() +
geom_errorbar(aes(ymin = mean_pH - se_pH, ymax= mean_pH + se_pH), width = 0.15) +
labs(x = "",
y = "mean pH") +
theme_cowplot()Box Plot
ggplot(mydata, aes(x = Position, y = pH)) +
geom_boxplot() +
geom_jitter(width = 0.1) +
labs(
x = "",
y = "pH",
title = "pH Above and Below Beaver Dam"
) +
cowplot::theme_cowplot() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 15)
)Violin Plot
mydata |>
ggplot(aes(x = Position, y = pH, fill = Position)) +
geom_violin(
trim = FALSE,
alpha = 0.6
) +
scale_fill_manual(
values = c(
"Below" = "#00BFC4", # same blue as line graphs
"Above" = "red"
)
) +
labs(
x = "",
y = "pH",
title = "pH Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
legend.position = "none", # 👈 removes legend
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
)
)Violin with Box Plot
mydata |>
ggplot(aes(x = Position, y = pH)) +
geom_violin(
aes(fill = Position),
trim = FALSE,
alpha = 0.6
) +
geom_boxplot(
width = 0.15,
outlier.shape = NA,
fill = "white",
colour = "black"
) +
scale_fill_manual(
values = c(
"Above" = "red",
"Below" = "#00BFC4"
)
) +
labs(
x = "",
y = "pH",
title = "pH Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
),
legend.position = "none"
)Analysis
Shapiro-Wilk Test
mydata |>
group_by(Position) |>
shapiro_test(pH)# A tibble: 2 × 4
Position variable statistic p
<chr> <chr> <dbl> <dbl>
1 Above pH 0.909 0.0840
2 Below pH 0.843 0.00650
Wilcox-test
mydata |>
wilcox_test(pH~Position)# A tibble: 1 × 7
.y. group1 group2 n1 n2 statistic p
* <chr> <chr> <chr> <int> <int> <dbl> <dbl>
1 pH Above Below 18 18 252. 0.00359
Mixed effects analysis
me_pH <- lmer(pH ~ Position + (1|Month), data = mydata)
summary(me_pH)Linear mixed model fit by REML ['lmerMod']
Formula: pH ~ Position + (1 | Month)
Data: mydata
REML criterion at convergence: -34.2
Scaled residuals:
Min 1Q Median 3Q Max
-1.5237 -0.6473 -0.1371 0.2466 3.8724
Random effects:
Groups Name Variance Std.Dev.
Month (Intercept) 0.01254 0.1120
Residual 0.01373 0.1172
Number of obs: 36, groups: Month, 6
Fixed effects:
Estimate Std. Error t value
(Intercept) 6.65000 0.05340 124.523
PositionBelow -0.16111 0.03905 -4.125
Correlation of Fixed Effects:
(Intr)
PositionBlw -0.366
Temperature Analysis
Summarise
Temperature_summary <- mydata |>
group_by(Position) |>
summarise(n = n(), mean_Temperature = mean(Temperature), sd_Temperature = sd(Temperature), se_Temperature = sd(Temperature)/sqrt(n()))
Temperature_summary# A tibble: 2 × 5
Position n mean_Temperature sd_Temperature se_Temperature
<chr> <int> <dbl> <dbl> <dbl>
1 Above 18 12.8 0.931 0.220
2 Below 18 12.3 1.02 0.241
Plot
Bar Chart
Temperature_summary |>
ggplot(aes(x = Position, y = mean_Temperature)) +
geom_col() +
geom_errorbar(aes(ymin = mean_Temperature - se_Temperature, ymax= mean_Temperature + se_Temperature), width = 0.15) +
labs(x = "",
y = "mean Temperature") +
theme_cowplot()Box Plot
mydata |>
ggplot(aes(x = Position, y =Temperature)) +
geom_boxplot() +
geom_jitter() +
labs(x = "",
y = "Temperature (ºC)",
title="Temperature Above and Below Beaver Dam") +
theme_cowplot() +
theme (
text = element_text(family = "Times New Roman"),
plot.title = element_text(
size = 15,
hjust = 0.5,
face = "bold"
)
)Violin Plot
mydata |>
ggplot(aes(x = Position, y = Temperature, fill = Position)) +
geom_violin(
trim = FALSE,
alpha = 0.6
) +
scale_fill_manual(
values = c(
"Below" = "#00BFC4", # same blue as line graphs
"Above" = "red"
)
) +
labs(
x = "",
y = "Temperature (ºC)",
title = "Temperature Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
legend.position = "none", # 👈 removes legend
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
)
)Violin with Box Plot
mydata |>
ggplot(aes(x = Position, y = Temperature)) +
geom_violin(
aes(fill = Position),
trim = FALSE,
alpha = 0.6
) +
geom_boxplot(
width = 0.15,
outlier.shape = NA,
fill = "white",
colour = "black"
) +
scale_fill_manual(
values = c(
"Above" = "red",
"Below" = "#00BFC4"
)
) +
labs(
x = "",
y = "Temperature (ºC)",
title = "Temperature Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
),
legend.position = "none"
)Analysis
Shapiro-Wilk Test
mydata |>
group_by(Position) |>
shapiro_test(Temperature)# A tibble: 2 × 4
Position variable statistic p
<chr> <chr> <dbl> <dbl>
1 Above Temperature 0.752 0.000342
2 Below Temperature 0.750 0.000328
Wilcox-test
mydata |>
wilcox_test(Temperature~Position)# A tibble: 1 × 7
.y. group1 group2 n1 n2 statistic p
* <chr> <chr> <chr> <int> <int> <dbl> <dbl>
1 Temperature Above Below 18 18 245 0.00871
TDS Analysis
Summarise
TDS_summary <- mydata |>
group_by(Position) |>
summarise(n = n(), mean_TDS= mean(TDS), sd_TDS = sd(TDS), se_TDS = sd(TDS)/sqrt(n()))
TDS_summary# A tibble: 2 × 5
Position n mean_TDS sd_TDS se_TDS
<chr> <int> <dbl> <dbl> <dbl>
1 Above 18 87.8 2.29 0.540
2 Below 18 85.8 2.26 0.532
Plot
Bar Chart
TDS_summary |>
ggplot(aes(x = Position, y = mean_TDS)) +
geom_col() +
geom_errorbar(aes(ymin = mean_TDS - se_TDS, ymax= mean_TDS + se_TDS), width = 0.15) +
labs(x = "",
y = "mean TDS") +
theme_cowplot()Box Plot
mydata |>
ggplot(aes(x = Position, y =TDS)) +
geom_boxplot() +
geom_jitter() +
labs(x = "",
y = "TDS (ppm)",
title ="TDS Above and Below Beaver Dam") +
theme_cowplot() +
theme (
text = element_text(family = "Times New Roman"),
plot.title = element_text(
size = 15,
hjust = 0.5,
face = "bold"
)
)Violin Plot
mydata |>
ggplot(aes(x = Position, y = TDS, fill = Position)) +
geom_violin(
trim = FALSE,
alpha = 0.6
) +
scale_fill_manual(
values = c(
"Below" = "#00BFC4", # same blue as line graphs
"Above" = "red"
)
) +
labs(
x = "",
y = "TDS (ppm)",
title = "TDS Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
legend.position = "none", # 👈 removes legend
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
)
)Violin with Box Plot
mydata |>
ggplot(aes(x = Position, y = TDS)) +
geom_violin(
aes(fill = Position),
trim = FALSE,
alpha = 0.6
) +
geom_boxplot(
width = 0.15,
outlier.shape = NA,
fill = "white",
colour = "black"
) +
scale_fill_manual(
values = c(
"Above" = "red",
"Below" = "#00BFC4"
)
) +
labs(
x = "",
y = "TDS (ppm) ",
title = "TDS Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
),
legend.position = "none"
)Analysis
Shapiro-Wilk Test
mydata |>
group_by(Position) |>
shapiro_test(TDS)# A tibble: 2 × 4
Position variable statistic p
<chr> <chr> <dbl> <dbl>
1 Above TDS 0.916 0.110
2 Below TDS 0.954 0.492
t-test
mydata |>
t_test(TDS~Position)# A tibble: 1 × 8
.y. group1 group2 n1 n2 statistic df p
* <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
1 TDS Above Below 18 18 2.57 34.0 0.0148
TON Analysis
Summarise
Nitrates_summary <- mydata |>
group_by(Position) |>
summarise(n = n(), mean_Nitrates= mean(Nitrates), sd_Nitrates = sd(Nitrates), se_Nitrates = sd(Nitrates)/sqrt(n()))
Nitrates_summary# A tibble: 2 × 5
Position n mean_Nitrates sd_Nitrates se_Nitrates
<chr> <int> <dbl> <dbl> <dbl>
1 Above 18 4.09 0.347 0.0819
2 Below 18 4.07 0.516 0.122
Plot
Bar Chart
Nitrates_summary |>
ggplot(aes(x = Position, y = mean_Nitrates)) +
geom_col() +
geom_errorbar(aes(ymin = mean_Nitrates - se_Nitrates, ymax= mean_Nitrates + se_Nitrates), width = 0.15) +
labs(x = "",
y = "mean Nitrates") +
theme_cowplot()Box Plot
mydata |>
ggplot(aes(x = Position, y =Nitrates)) +
geom_boxplot() +
geom_jitter() +
labs(x = "",
y = "TON (mg/l)",
title ="TON Above and Below Beaver Dam") +
theme_cowplot() +
theme (
text = element_text(family = "Times New Roman"),
plot.title = element_text(
size = 15,
hjust = 0.5,
face = "bold",
)
)Violin Plot
mydata |>
ggplot(aes(x = Position, y = Nitrates, fill = Position)) +
geom_violin(
trim = FALSE,
alpha = 0.6
) +
scale_fill_manual(
values = c(
"Below" = "#00BFC4", # same blue as line graphs
"Above" = "red"
)
) +
labs(
x = "",
y = "TON (mg/l)",
title = "TON Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
legend.position = "none", # 👈 removes legend
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
)
)Violin with Box Plot
mydata |>
ggplot(aes(x = Position, y = Nitrates)) +
geom_violin(
aes(fill = Position),
trim = FALSE,
alpha = 0.6
) +
geom_boxplot(
width = 0.15,
outlier.shape = NA,
fill = "white",
colour = "black"
) +
scale_fill_manual(
values = c(
"Above" = "red",
"Below" = "#00BFC4"
)
) +
labs(
x = "",
y = "TON mg/l",
title = "TON Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
),
legend.position = "none"
)Analysis
Shapiro-Wilk Test
mydata |>
group_by(Position) |>
shapiro_test(Nitrates)# A tibble: 2 × 4
Position variable statistic p
<chr> <chr> <dbl> <dbl>
1 Above Nitrates 0.883 0.0298
2 Below Nitrates 0.878 0.0244
Wilcox-test
mydata |>
wilcox_test(Nitrates~Position)# A tibble: 1 × 7
.y. group1 group2 n1 n2 statistic p
* <chr> <chr> <chr> <int> <int> <dbl> <dbl>
1 Nitrates Above Below 18 18 150. 0.728
DO Analysis
Summarise
DO_summary <- mydata |>
group_by(Position) |>
summarise(n = n(), mean_DO= mean(DO), sd_DO = sd(DO), se_DO = sd(DO)/sqrt(n()))
DO_summary# A tibble: 2 × 5
Position n mean_DO sd_DO se_DO
<chr> <int> <dbl> <dbl> <dbl>
1 Above 18 16.6 0.447 0.105
2 Below 18 17.0 0.416 0.0981
Plot
Bar Chart
DO_summary |>
ggplot(aes(x = Position, y = mean_DO)) +
geom_col() +
geom_errorbar(aes(ymin = mean_DO - se_DO, ymax= mean_DO + se_DO), width = 0.15) +
labs(x = "",
y = "mean DO") +
theme_cowplot()Box Plot
mydata |>
ggplot(aes(x = Position, y =DO)) +
geom_boxplot() +
geom_jitter() +
labs(x = "",
y = "Dissolved Oxygen (%)",
title ="Dissolved Oxygen Above and Below Beaver Dam") +
theme_cowplot() +
theme (
text = element_text(family = "Times New Roman"),
plot.title = element_text(
size = 15,
hjust = 0.5,
face = "bold"
)
)Violin Plot
mydata |>
ggplot(aes(x = Position, y = DO, fill = Position)) +
geom_violin(
trim = FALSE,
alpha = 0.6
) +
scale_fill_manual(
values = c(
"Below" = "#00BFC4", # same blue as line graphs
"Above" = "red"
)
) +
labs(
x = "",
y = "DO (%)",
title = "DO Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
legend.position = "none", # 👈 removes legend
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
)
)Violin with Box Plot
mydata |>
ggplot(aes(x = Position, y = DO)) +
geom_violin(
aes(fill = Position),
trim = FALSE,
alpha = 0.6
) +
geom_boxplot(
width = 0.15,
outlier.shape = NA,
fill = "white",
colour = "black"
) +
scale_fill_manual(
values = c(
"Above" = "red",
"Below" = "#00BFC4"
)
) +
labs(
x = "",
y = "DO (%)",
title = "DO Above and Below Beaver Dam"
) +
theme_cowplot() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 15
),
legend.position = "none"
)Analysis
Shapiro-Wilk Test
mydata |>
group_by(Position) |>
shapiro_test(DO)# A tibble: 2 × 4
Position variable statistic p
<chr> <chr> <dbl> <dbl>
1 Above DO 0.913 0.0953
2 Below DO 0.859 0.0119
Wilcox-test
mydata |>
wilcox_test(DO~Position)# A tibble: 1 × 7
.y. group1 group2 n1 n2 statistic p
* <chr> <chr> <chr> <int> <int> <dbl> <dbl>
1 DO Above Below 18 18 66.5 0.00254
Checking for Normality
Change parameter accordingly
hist(mydata$pH,
main = "Histogram of pH",
xlab = "pH",
col = "lightblue",
border = "black")Line graphs
Ensure month order
mydata$Month <- factor(mydata$Month,
levels = c("April", "May", "June", "July", "August", "September"))Summarise
summary_mydata <- mydata %>%
group_by(Month, Position) %>%
summarise(
pH_mean = mean(pH, na.rm = TRUE),
pH_sd = sd(pH, na.rm = TRUE),
Temperature_mean = mean(Temperature, na.rm = TRUE),
Temperature_sd = sd(Temperature, na.rm = TRUE),
TDS_mean = mean(TDS, na.rm = TRUE),
TDS_sd = sd(TDS, na.rm = TRUE),
DO_mean = mean(DO, na.rm = TRUE),
DO_sd = sd(DO, na.rm = TRUE),
Nitrates_mean = mean(Nitrates, na.rm = TRUE),
Nitrates_sd = sd(Nitrates, na.rm = TRUE)
) %>%
ungroup()pH
ggplot(summary_mydata, aes(x = Month, y = pH_mean, color = Position, group = Position)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Average pH Over Months", y = "DO (%)") +
theme_minimal()+
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)With sd bars
ggplot(summary_mydata,
aes(x = Month, y = pH_mean, color = Position, group = Position)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = pH_mean - pH_sd,
ymax = pH_mean + pH_sd),
width = 0.2,
linewidth = 0.5,
colour = "black") +
labs(title = "Average pH Over Months",
y = "pH") +
theme_minimal() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)Temperature
ggplot(summary_mydata, aes(x = Month, y = Temperature_mean, color = Position, group = Position)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Average Temperature Over Months", y = "DO (%)") +
theme_minimal()+
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)With sd bars
ggplot(summary_mydata,
aes(x = Month, y = Temperature_mean, color = Position, group = Position)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = Temperature_mean - Temperature_sd,
ymax = Temperature_mean + Temperature_sd),
width = 0.2,
linewidth = 0.5,
colour = "black") +
labs(title = "Average Temperature Over Months",
y = "pH") +
theme_minimal() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)TDS
ggplot(summary_mydata, aes(x = Month, y = TDS_mean, color = Position, group = Position)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Average TDS Over Months", y = "DO (%)") +
theme_minimal()+
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)With sd bars
ggplot(summary_mydata,
aes(x = Month, y = TDS_mean, color = Position, group = Position)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = TDS_mean - TDS_sd,
ymax = TDS_mean + TDS_sd),
width = 0.2,
linewidth = 0.5,
colour = "black") +
labs(title = "Average TDS Over Months",
y = "pH") +
theme_minimal() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)TON
ggplot(summary_mydata, aes(x = Month, y = Nitrates_mean, color = Position, group = Position)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Average TON Over Months", y = "DO (%)") +
theme_minimal()+
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)With sd bars
ggplot(summary_mydata,
aes(x = Month, y = Nitrates_mean, color = Position, group = Position)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = Nitrates_mean - Nitrates_sd,
ymax = Nitrates_mean + Nitrates_sd),
width = 0.2,
linewidth = 0.5,
colour = "black") +
labs(title = "Average TON Over Months",
y = "pH") +
theme_minimal() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)DO
ggplot(summary_mydata, aes(x = Month, y = DO_mean, color = Position, group = Position)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Average DO Over Months", y = "DO (%)") +
theme_minimal()+
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)With sd bars
ggplot(summary_mydata,
aes(x = Month, y = DO_mean, color = Position, group = Position)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = DO_mean - DO_sd,
ymax = DO_mean + DO_sd),
width = 0.2,
linewidth = 0.5,
colour = "black") +
labs(title = "Average DO Over Months",
y = "pH") +
theme_minimal() +
theme(
text = element_text(family = "Times New Roman"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)