library(vegan)
## Loading required package: permute
## Loading required package: lattice
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
Here we are going to show and calculate the species distribution
library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
par(mfrow = c(1,1))
setwd("~/Luna")
df <- read_excel("Luna.xlsx", sheet = "Sheet22")
df_count <- df
speciescount <- ggplot(df_count, aes(x = amount, y = reorder(scientific, amount))) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(x = "Number of abundance", y = "", title = "Diversity (Speciesand abundance of Odonata)") +
theme_minimal() +
theme(axis.text.y = element_text(face = "italic"))
speciescount
Above here we see the distribution of all Odonata species found during our experiment. On the y-axis of this barplot we see the scientific name of the species, and on the x-axis the amount of times this species was found, Note that the y-axis is sorted based on abundance.
And now the smae for the family:
df <- read_excel("Luna.xlsx", sheet = "Data export")
df_count <- df %>%
count(family, name = "count")
familycount <- ggplot(df_count, aes(x = count, y = reorder(family, count))) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(x = "Number of abundance", y = "Family name", title = "Distribution of Families") +
theme_minimal() +
theme(axis.text.y = element_text(face = "italic"))
familycount
The description is the same as the one in the figure above.
Next up, we will calculate the species evenness metrics. Using the Shannon-Wiener index, as well as the Simpson index and it’s reverse form.
library(dplyr)
library(dplyr)
data <- data.frame(
species = c("Aeshna cyanea", "Aeshna grandis", "Aeshna isoceles", "Aeshna mixta", "Anax imperator", "Anax parthenope",
"Calopteryx splendens", "Calopteryx virgo", "Chalcolestes viridis", "Coenagrion puella", "Coenagrion pulchellum",
"Coenagrion scitulum", "Enallagma cyathigerum", "Erythromma viridulum", "Gomphus pulchellus", "Ischnura elegans",
"Libellula depressa", "Libellula quadrimaculata", "Orthetrum cancellatum", "Orthetrum coerulescens",
"Platycnemis pennipes", "Pyrrhosoma nymphula", "Sympetrum sanguineum", "Sympetrum striolatum", "Sympetrum vulgatum"),
count = c(7, 18, 2, 38, 46, 11, 8, 25, 13, 132, 8, 1, 192, 13, 7, 131, 20, 7, 60, 122, 40, 23, 88, 5, 5)
)
total <- sum(data$count)
p <- data$count / total
shannon_index <- -sum(p * log(p))
simpson_index <- sum(p^2)
inverse_simpson <- 1 / simpson_index
shannon_index
## [1] 2.595379
simpson_index
## [1] 0.1010106
inverse_simpson
## [1] 9.899947
data <- data.frame(index = c("Shannon", "Simpson", "InverseSimpson"), values = c(2.593792, 0.1010106, 9.899947))
data
## index values
## 1 Shannon 2.5937920
## 2 Simpson 0.1010106
## 3 InverseSimpson 9.8999470
This leads us to this data table, where we can see a very low Simpson index of about 0.10. Indicating little diversity in our species range. A Shannon index of about 2.6 doenn’t give a terrible evenness, but not great either. This probably means that at least a couple of species are dominating the sample pool.
library(ggplot2)
library(vegan)
# Sample data
sample_moment <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
cumulative_species <- c(0, 11, 15, 18, 19, 22, 22, 23, 23, 23, 23, 25, 25, 25, 25, 25)
species_matrix <- matrix(0, nrow = length(sample_moment), ncol = max(cumulative_species))
for (i in 1:length(sample_moment)) {
species_matrix[i, 1:cumulative_species[i]] <- 1
}
accum_model <- specaccum(species_matrix, method = "random")
future_moments <- 1:21
predicted_species <- accum_model$richness
predicted_species <- c(predicted_species, rep(predicted_species[length(predicted_species)], length(future_moments) - length(predicted_species)))
plot_data <- data.frame(Sample_Moment = sample_moment, Cumulative_Species = cumulative_species)
future_data <- data.frame(Sample_Moment = future_moments, Predicted = predicted_species)
ggplot() +
geom_line(data = future_data, aes(x = Sample_Moment, y = Predicted), color = "green", linetype = "dashed", size = 1) +
geom_line(data = plot_data, aes(x = Sample_Moment, y = Cumulative_Species), color = "hotpink", size = 1) +
geom_point(data = plot_data, aes(x = Sample_Moment, y = Cumulative_Species), color = "coral", size = 2) +
labs(
title = "Rarefaction Curve with Predicted Species Accumulation",
x = "Sampling Effort (Moment)",
y = "Cumulative Species Found"
) +
theme_minimal()
Here we get our rarefraction curve, with a theoretical predicted line
(green) based on our species found, showing an exponential growth of
species. It looks like an increase in sampling effort wouldn’t lead to
many new species. Although in this province we have a rich Odonata
fauna. Coming to 58-59 species in the last 3 years (waarnemingen.be)
library(ggplot2)
library(vegan)
sample_moment <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
cumulative_species <- c(0, 11, 15, 18, 19, 22, 22, 23, 23, 23, 23, 25, 25, 25, 25, 25)
loess_model <- loess(cumulative_species ~ sample_moment)
correlation <- cor(sample_moment, cumulative_species)
lm_model <- lm(cumulative_species ~ sample_moment)
p_value <- summary(lm_model)$coefficients[2, 4]
ggplot(data = data.frame(Sample_Moment = sample_moment, Cumulative_Species = cumulative_species), aes(x = Sample_Moment, y = Cumulative_Species)) +
geom_point(color = "coral", size = 2) +
geom_smooth(method = "loess", color = "hotpink", linetype = "solid", size = 1, se = TRUE) +
labs(
title = "Rarefaction Curve with LOESS Fit",
x = "Sampling Effort (Moment)",
y = "Cumulative Species Found"
) +
annotate("text", x = 10, y = 17, label = paste("Pearson r =", round(correlation, 2)), color = "black", size = 5) +
annotate("text", x = 10, y = 15, label = paste("p-value =", round(p_value, 4)), color = "black", size = 5) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Here we fitted the same data, but we included Pearson correlation and a p-value. We see a high correlation in our LOESS-fitted line, indicating a pattern of exponential increase of our data. The p-value shows the significance of this fit. (But I think since we fitted this regression ourselves we can ignore this number, even though it is significant.)
Now we do an ANOVA with our environmental variables to see the effect on species
library(readxl)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(ggplot2)
data <- read_excel("ANOVA-time.xlsx")
head(data)
## # A tibble: 6 × 7
## TemperatureMean CloudCover Wind WindDirection Humidity Pressure Species
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 17.5 50 20 180 52 1005 11
## 2 21.5 33 18 270 66 1015 15
## 3 19 50 18 270 71 1012 13
## 4 26 10 6 180 45 1021 11
## 5 24.5 60 11 270 50 1013 16
## 6 24.5 0 4 0 65 1012 11
full_model <- lm(Species ~ TemperatureMean + CloudCover + Wind + WindDirection + Humidity + Pressure, data = data)
vif_values <- vif(full_model)
print(vif_values)
## TemperatureMean CloudCover Wind WindDirection Humidity
## 1.844767 2.283599 1.791559 2.782105 3.345319
## Pressure
## 2.908007
null_model <- lm(Species ~ 1, data = data)
forward_model <- step(null_model, direction = "forward", scope = formula(full_model))
## Start: AIC=45.67
## Species ~ 1
##
## Df Sum of Sq RSS AIC
## + TemperatureMean 1 146.144 129.59 36.345
## + Humidity 1 112.802 162.93 39.779
## + Pressure 1 65.205 210.53 43.624
## + WindDirection 1 55.264 220.47 44.316
## <none> 275.73 45.671
## + CloudCover 1 19.778 255.96 46.554
## + Wind 1 5.613 270.12 47.362
##
## Step: AIC=36.34
## Species ~ TemperatureMean
##
## Df Sum of Sq RSS AIC
## + Wind 1 30.2322 99.357 34.360
## + Humidity 1 16.3254 113.264 36.325
## <none> 129.589 36.345
## + WindDirection 1 16.1713 113.418 36.345
## + Pressure 1 7.3461 122.243 37.469
## + CloudCover 1 1.6536 127.935 38.152
##
## Step: AIC=34.36
## Species ~ TemperatureMean + Wind
##
## Df Sum of Sq RSS AIC
## + Pressure 1 30.3995 68.957 30.882
## + Humidity 1 12.7124 86.645 34.306
## <none> 99.357 34.360
## + WindDirection 1 8.0253 91.332 35.097
## + CloudCover 1 2.1478 97.209 36.032
##
## Step: AIC=30.88
## Species ~ TemperatureMean + Wind + Pressure
##
## Df Sum of Sq RSS AIC
## <none> 68.957 30.882
## + CloudCover 1 2.11125 66.846 32.415
## + Humidity 1 1.41633 67.541 32.570
## + WindDirection 1 0.16539 68.792 32.846
summary(forward_model)
##
## Call:
## lm(formula = Species ~ TemperatureMean + Wind + Pressure, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6128 -0.5317 0.1811 0.7658 5.1689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -270.9968 118.1629 -2.293 0.04252 *
## TemperatureMean 0.7214 0.1881 3.836 0.00276 **
## Wind 0.4403 0.1510 2.915 0.01405 *
## Pressure 0.2585 0.1174 2.202 0.04990 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.504 on 11 degrees of freedom
## Multiple R-squared: 0.7499, Adjusted R-squared: 0.6817
## F-statistic: 10.99 on 3 and 11 DF, p-value: 0.001226
backward_model <- step(full_model, direction = "backward")
## Start: AIC=35.25
## Species ~ TemperatureMean + CloudCover + Wind + WindDirection +
## Humidity + Pressure
##
## Df Sum of Sq RSS AIC
## - WindDirection 1 2.987 64.850 33.960
## - CloudCover 1 3.736 65.600 34.133
## - Humidity 1 4.735 66.598 34.359
## <none> 61.864 35.253
## - Pressure 1 24.465 86.328 38.252
## - Wind 1 35.653 97.516 40.080
## - TemperatureMean 1 52.504 114.368 42.471
##
## Step: AIC=33.96
## Species ~ TemperatureMean + CloudCover + Wind + Humidity + Pressure
##
## Df Sum of Sq RSS AIC
## - Humidity 1 1.996 66.846 32.415
## - CloudCover 1 2.691 67.541 32.570
## <none> 64.850 33.960
## - Pressure 1 21.713 86.563 36.292
## - Wind 1 32.784 97.635 38.098
## - TemperatureMean 1 61.270 126.120 41.938
##
## Step: AIC=32.42
## Species ~ TemperatureMean + CloudCover + Wind + Pressure
##
## Df Sum of Sq RSS AIC
## - CloudCover 1 2.111 68.957 30.882
## <none> 66.846 32.415
## - Pressure 1 30.363 97.209 36.032
## - Wind 1 42.481 109.328 37.794
## - TemperatureMean 1 93.870 160.716 43.574
##
## Step: AIC=30.88
## Species ~ TemperatureMean + Wind + Pressure
##
## Df Sum of Sq RSS AIC
## <none> 68.957 30.882
## - Pressure 1 30.400 99.357 34.360
## - Wind 1 53.286 122.243 37.469
## - TemperatureMean 1 92.259 161.217 41.620
summary(backward_model)
##
## Call:
## lm(formula = Species ~ TemperatureMean + Wind + Pressure, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6128 -0.5317 0.1811 0.7658 5.1689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -270.9968 118.1629 -2.293 0.04252 *
## TemperatureMean 0.7214 0.1881 3.836 0.00276 **
## Wind 0.4403 0.1510 2.915 0.01405 *
## Pressure 0.2585 0.1174 2.202 0.04990 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.504 on 11 degrees of freedom
## Multiple R-squared: 0.7499, Adjusted R-squared: 0.6817
## F-statistic: 10.99 on 3 and 11 DF, p-value: 0.001226
Which results in a model, forward and backwards, that suggest that the mean temperature of the afternoon, wind, and air pressure are contributing the most to the species found. Wind speed, humidity, and clour cover dont seem to do much. The last one is suprising to me!
Now for abundancy
data <- read_excel("ANOVA-time2.xlsx")
full_model <- lm(Abundancy ~ TemperatureMean + CloudCover + Wind + WindDirection + Humidity + Pressure, data = data)
vif_values <- vif(full_model)
print(vif_values)
## TemperatureMean CloudCover Wind WindDirection Humidity
## 1.844767 2.283599 1.791559 2.782105 3.345319
## Pressure
## 2.908007
null_model <- lm(Abundancy ~ 1, data = data)
forward_model <- step(null_model, direction = "forward", scope = formula(full_model))
## Start: AIC=114.29
## Abundancy ~ 1
##
## Df Sum of Sq RSS AIC
## + TemperatureMean 1 11908.0 14844 107.46
## + Humidity 1 9475.4 17276 109.74
## + CloudCover 1 6354.7 20397 112.23
## + Pressure 1 3469.3 23283 114.21
## <none> 26752 114.30
## + WindDirection 1 3203.4 23548 114.38
## + Wind 1 354.4 26397 116.09
##
## Step: AIC=107.46
## Abundancy ~ TemperatureMean
##
## Df Sum of Sq RSS AIC
## <none> 14844 107.46
## + Humidity 1 1469.75 13374 107.89
## + CloudCover 1 1090.13 13754 108.31
## + WindDirection 1 639.92 14204 108.80
## + Pressure 1 74.23 14770 109.38
## + Wind 1 66.54 14777 109.39
summary(forward_model)
##
## Call:
## lm(formula = Abundancy ~ TemperatureMean, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -66.484 -19.712 2.362 18.748 66.671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -75.525 45.332 -1.666 0.11961
## TemperatureMean 7.231 2.239 3.229 0.00658 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.79 on 13 degrees of freedom
## Multiple R-squared: 0.4451, Adjusted R-squared: 0.4024
## F-statistic: 10.43 on 1 and 13 DF, p-value: 0.006585
backward_model <- step(full_model, direction = "backward")
## Start: AIC=114.3
## Abundancy ~ TemperatureMean + CloudCover + Wind + WindDirection +
## Humidity + Pressure
##
## Df Sum of Sq RSS AIC
## - WindDirection 1 27.8 12054 112.34
## - Wind 1 121.7 12148 112.45
## - Pressure 1 362.1 12389 112.75
## - Humidity 1 423.3 12450 112.82
## - CloudCover 1 1303.0 13330 113.85
## <none> 12027 114.30
## - TemperatureMean 1 3216.0 15243 115.86
##
## Step: AIC=112.34
## Abundancy ~ TemperatureMean + CloudCover + Wind + Humidity +
## Pressure
##
## Df Sum of Sq RSS AIC
## - Wind 1 187.0 12242 110.57
## - Pressure 1 345.9 12400 110.76
## - Humidity 1 909.1 12964 111.43
## - CloudCover 1 1277.4 13332 111.85
## <none> 12054 112.34
## - TemperatureMean 1 3245.2 15300 113.91
##
## Step: AIC=110.57
## Abundancy ~ TemperatureMean + CloudCover + Humidity + Pressure
##
## Df Sum of Sq RSS AIC
## - Pressure 1 552.45 12794 109.23
## - CloudCover 1 1094.46 13336 109.85
## - Humidity 1 1326.60 13568 110.11
## <none> 12242 110.57
## - TemperatureMean 1 3105.83 15347 111.96
##
## Step: AIC=109.23
## Abundancy ~ TemperatureMean + CloudCover + Humidity
##
## Df Sum of Sq RSS AIC
## - CloudCover 1 580.07 13374 107.89
## - Humidity 1 959.68 13754 108.31
## <none> 12794 109.23
## - TemperatureMean 1 2944.47 15738 110.34
##
## Step: AIC=107.9
## Abundancy ~ TemperatureMean + Humidity
##
## Df Sum of Sq RSS AIC
## - Humidity 1 1469.8 14844 107.46
## <none> 13374 107.89
## - TemperatureMean 1 3902.3 17276 109.74
##
## Step: AIC=107.46
## Abundancy ~ TemperatureMean
##
## Df Sum of Sq RSS AIC
## <none> 14844 107.46
## - TemperatureMean 1 11908 26752 114.30
summary(backward_model)
##
## Call:
## lm(formula = Abundancy ~ TemperatureMean, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -66.484 -19.712 2.362 18.748 66.671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -75.525 45.332 -1.666 0.11961
## TemperatureMean 7.231 2.239 3.229 0.00658 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.79 on 13 degrees of freedom
## Multiple R-squared: 0.4451, Adjusted R-squared: 0.4024
## F-statistic: 10.43 on 1 and 13 DF, p-value: 0.006585
With our abundancy we see that only mean temperature has an influence. For good measure, we will also fit the species-model-coefficients into our species model to see the results.
lmmm <- lm(formula = Abundancy ~ TemperatureMean + Wind + Pressure, data = data)
summary(lmmm)
##
## Call:
## lm(formula = Abundancy ~ TemperatureMean + Wind + Pressure, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65.167 -18.005 4.328 18.764 66.911
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -708.0407 1719.4786 -0.412 0.6884
## TemperatureMean 6.9554 2.7366 2.542 0.0274 *
## Wind 0.7807 2.1974 0.355 0.7291
## Pressure 0.6206 1.7082 0.363 0.7232
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36.43 on 11 degrees of freedom
## Multiple R-squared: 0.4542, Adjusted R-squared: 0.3053
## F-statistic: 3.051 on 3 and 11 DF, p-value: 0.07395
However, only the mean temperature stays significant, while the others are not significant, boasting p-values around 0.72
First we will load our dataset into R
# Load required libraries
library(ggplot2)
library(tidyr)
library(dplyr)
# Create the data frame (you can replace this with your actual data)
data <- data.frame(
Date = as.Date(c("2024-06-15", "2024-07-10", "2024-07-16", "2024-07-18", "2024-07-25",
"2024-08-02", "2024-08-10", "2024-08-19", "2024-08-23", "2024-09-08",
"2024-09-15", "2024-09-24", "2024-10-10", "2024-10-12", "2024-10-26")),
Aeshna_cyanea = c(0, 3, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0),
Aeshna_grandis = c(0, 0, 1, 1, 4, 0, 6, 4, 2, 2, 8, 0, 0, 5, 3),
Aeshna_isoceles = c(1, 5, 7, 3, 8, 5, 1, 4, 4, 1, 1, 3, 6, 6, 6),
Aeshna_mixta = c(0, 1, 0, 0, 4, 3, 2, 2, 4, 1, 0, 0, 0, 2, 0),
Anax_imperator = c(2, 1, 5, 3, 2, 0, 3, 1, 1, 3, 4, 6, 2, 0, 6),
Anax_parthenope = c(0, 9, 11, 6, 2, 18, 3, 7, 9, 16, 9, 0, 0, 3, 0),
Calopteryx_splendens = c(10, 13, 12, 10, 9, 26, 16, 14, 12, 24, 9, 0, 0, 3, 8),
Calopteryx_virgo = c(3, 5, 8, 4, 1, 15, 2, 7, 2, 2, 5, 6, 2, 3, 4),
Chalcolestes_viridis = c(4, 9, 5, 13, 21, 3, 37, 26, 26, 17, 2, 0, 0, 0, 3),
Coenagrion_puella = c(2, 5, 8, 3, 1, 6, 16, 23, 12, 5, 1, 3, 3, 1, 2),
Coenagrion_pulchellum = c(3, 9, 10, 3, 2, 31, 37, 14, 26, 17, 9, 0, 14, 9, 11),
Coenagrion_scitulum = c(2, 6, 3, 4, 18, 9, 8, 23, 18, 11, 1, 0, 6, 8, 4),
Enallagma_cyathigerum = c(0, 6, 5, 3, 2, 3, 3, 7, 26, 5, 1, 5, 3, 2, 4),
Erythromma_viridulum = c(2, 11, 10, 1, 0, 6, 7, 18, 3, 3, 1, 2, 7, 5, 0),
Gomphus_pulchellus = c(0, 6, 0, 1, 0, 9, 23, 3, 26, 5, 3, 0, 0, 0, 1),
Ischnura_elegans = c(1, 3, 5, 3, 18, 7, 2, 2, 7, 11, 10, 3, 8, 3, 1),
Libellula_depressa = c(2, 9, 3, 1, 4, 31, 23, 18, 26, 5, 1, 1, 0, 2, 3),
Libellula_quadrimaculata = c(1, 7, 4, 4, 21, 15, 37, 5, 26, 12, 10, 0, 1, 0, 4),
Orthetrum_cancellatum = c(3, 5, 10, 1, 2, 6, 7, 18, 12, 3, 2, 0, 4, 2, 4),
Orthetrum_coerulescens = c(4, 6, 3, 2, 7, 6, 23, 26, 2, 17, 1, 6, 8, 1, 11),
Platycnemis_pennipes = c(2, 2, 3, 3, 4, 9, 5, 18, 7, 5, 1, 2, 0, 1, 0),
Pyrrhosoma_nymphula = c(6, 11, 6, 6, 18, 31, 5, 23, 8, 3, 5, 0, 9, 0, 3),
Sympetrum_sanguineum = c(3, 5, 6, 13, 18, 6, 1, 2, 8, 11, 1, 0, 3, 4, 7),
Sympetrum_striolatum = c(2, 6, 7, 1, 7, 8, 6, 4, 3, 5, 0, 0, 1, 2, 2),
Sympetrum_vulgatum = c(0, 3, 0, 3, 7, 9, 4, 4, 2, 3, 1, 0, 0, 2, 4)
)
long_data <- data %>%
pivot_longer(cols = -Date, names_to = "Species", values_to = "Count")
Then we will create barplots for every species
# Create the bar plots for each species without the legend
ggplot(long_data, aes(x = Date, y = Count, fill = Species)) +
geom_bar(stat = "identity") +
facet_wrap(~Species, scales = "free_y", ncol = 5) +
labs(y = "Count", x = "Date") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") # Remove the legend
Here with individual LOESS trendlines
# Create 25 species plots with trendlines for each species
ggplot(long_data, aes(x = Date, y = Count, fill = Species)) +
geom_bar(stat = "identity", position = "stack") + # Stacked bars for each species
geom_smooth(method = "loess", color = "blue", se = FALSE, aes(group = 1)) + # LOESS trendline
labs(y = "Count", x = "Date") +
facet_wrap(~Species, scales = "free_y", ncol = 5) + # 25 plots in a 5x5 grid
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'
As well as a lineplot for all species at once
# Create a single figure showing all species on one plot (Line plot) without legend
ggplot(long_data, aes(x = Date, y = Count, color = Species, group = Species)) +
geom_line() +
geom_point() +
labs(y = "Count", x = "Date") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") # Remove the legend
And then we show a LOESS plot fitted to ALL species, to show the general trend of our abundancies
# Fit a trendline (LOESS) and plot it without individual species lines
ggplot(long_data, aes(x = Date, y = Count)) +
geom_smooth(method = "loess", color = "blue", se = FALSE) + # LOESS trendline without confidence interval shading
labs(y = "Count", x = "Date") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## `geom_smooth()` using formula = 'y ~ x'
Now instead of the species we will do the same for the families
library(readxl)
library(tidyr)
library(ggplot2)
data <- read_excel("Luna.xlsx", sheet = "Sheet5")
long_data <- pivot_longer(data, cols = -Family, names_to = "Date", values_to = "Count")
long_data$Family <- factor(long_data$Family, levels = c("Aeshnidae", "Calopterygidae", "Coenagrionidae",
"Lestidae", "Libellulidae", "Platycnemididae", "Gomphidae"))
ggplot(long_data, aes(x = Date, y = Count)) +
geom_bar(stat = "identity", position = "stack", aes(fill = Family)) +
labs(y = "Count", x = "Date") +
theme_minimal() +
facet_wrap(~Family, scales = "free_y", ncol = 2, nrow = 4) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none",
strip.text = element_text(size = 12))
However this looks ugly , so here is Gomphidae seperately
data <- read_excel("Luna.xlsx", sheet = "Sheet6")
long_data <- pivot_longer(data, cols = -Family, names_to = "Date", values_to = "Count")
long_data$Family <- factor(long_data$Family, levels = c("Aeshnidae", "Calopterygidae", "Coenagrionidae", "Lestidae", "Libellulidae", "Platycnemididae"))
ggplot(long_data, aes(x = Date, y = Count)) +
geom_bar(stat = "identity", position = "stack", aes(fill = Family)) +
labs(y = "Count", x = "Date") +
theme_minimal() +
facet_wrap(~Family, scales = "free_y", ncol = 2, nrow = 4) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none",
strip.text = element_text(size = 12))
data <- read_excel("Luna.xlsx", sheet = "Sheet7")
long_data <- pivot_longer(data, cols = -Family, names_to = "Date", values_to = "Count")
long_data$Family <- factor(long_data$Family, levels = "Gomphidae")
ggplot(long_data, aes(x = Date, y = Count)) +
geom_bar(stat = "identity", position = "stack", aes(fill = Family)) +
labs(y = "Count", x = "Date") +
ggtitle("Gomphidae") + # Add custom title
theme_minimal() +
facet_wrap(~Family, scales = "free_y", ncol = 2, nrow = 4) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none",
strip.text = element_text(size = 0)) +
theme(plot.title = element_text(hjust = 0.5, color = "black")) +
scale_fill_manual(values = rep("hotpink", length(unique(long_data$Family))))
# Load libraries
library(ggplot2)
library(dplyr)
library(readr)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
df <- read_excel("C:/Users/Arne/Downloads/Waarnemingen libellen (1).xlsx", sheet = "Sheet10")
df_summary <- df %>%
group_by(name, year) %>%
summarise(coverage = sum(coverage), .groups = "drop")
# Get all unique species and years
all_species <- unique(df_summary$name)
all_years <- unique(df_summary$year)
# Create a complete grid of all combinations
complete_grid <- expand.grid(name = all_species, year = all_years)
# Join with actual data and fill NAs with 0
df_long <- complete_grid %>%
left_join(df_summary, by = c("name", "year")) %>%
mutate(coverage = replace_na(coverage, 0))
# View the long-format data
print(df_long)
## name year coverage
## 1 Alnus glutinosa 2023 50
## 2 Ceratophyllum demersum 2023 8
## 3 Heracleum sphondylium 2023 1
## 4 Humulus lupulus 2023 1
## 5 Juncus effusus 2023 8
## 6 Lycopus europaeus 2023 2
## 7 Mentha aquatica 2023 2
## 8 Myosotis arvensis 2023 1
## 9 Nymphaea marliacea 2023 2
## 10 Phragmites australis 2023 278
## 11 Populus nigra 2023 2
## 12 Populus x canadensis 2023 0
## 13 Prunus serotina 2023 1
## 14 Quercus robur 2023 1
## 15 Rosa Subsec. Caninae 2023 1
## 16 Salix alba 2023 8
## 17 Salix cinerea 2023 331
## 18 Salix viminalis 2023 2
## 19 Salix x holosericea (S. cinerea x S. viminalis) 2023 1
## 20 Alnus glutinosa 2024 39
## 21 Ceratophyllum demersum 2024 9
## 22 Heracleum sphondylium 2024 3
## 23 Humulus lupulus 2024 2
## 24 Juncus effusus 2024 8
## 25 Lycopus europaeus 2024 1
## 26 Mentha aquatica 2024 0
## 27 Myosotis arvensis 2024 1
## 28 Nymphaea marliacea 2024 2
## 29 Phragmites australis 2024 278
## 30 Populus nigra 2024 0
## 31 Populus x canadensis 2024 2
## 32 Prunus serotina 2024 0
## 33 Quercus robur 2024 2
## 34 Rosa Subsec. Caninae 2024 0
## 35 Salix alba 2024 10
## 36 Salix cinerea 2024 341
## 37 Salix viminalis 2024 2
## 38 Salix x holosericea (S. cinerea x S. viminalis) 2024 0
df <- df_long
# Summarize: total coverage per species per year
summary_df <- df %>%
group_by(name, year) %>%
summarise(total_coverage = sum(coverage), .groups = "drop")
# Define species to isolate
focus_species <- c("Salix alba", "Salix cinerea", "Alnus glutinosa", "Phragmites australis")
# Create two subsets
df_focus <- summary_df %>% filter(name %in% focus_species)
df_other <- summary_df %>% filter(!name %in% focus_species)
# Order species alphabetically
df_focus$name <- factor(df_focus$name, levels = sort(unique(df_focus$name)))
df_other$name <- factor(df_other$name, levels = sort(unique(df_other$name)))
# Plot for other species (horizontal)
plot_other <- ggplot(df_other, aes(x = name, y = total_coverage, fill = factor(year))) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
labs(
title = "Species Coverage by Year (Smaller abundances)",
x = "Species",
y = "Total Coverage",
fill = "Year"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# Plot for focus species (horizontal)
plot_focus <- ggplot(df_focus, aes(x = name, y = total_coverage, fill = factor(year))) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
labs(
title = "Species Coverage by Year (Large abundances)",
x = "Species",
y = "Total Coverage",
fill = "Year"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(face = "italic"))
# Show both
print(plot_focus)
print(plot_other)
df <- read_excel("C:/Users/Arne/Downloads/Waarnemingen libellen (1).xlsx", sheet = "Sheet10")
# Count unique species per zone per year
zone_species_counts <- df %>%
filter(coverage > 0) %>% # only count species that are actually present
group_by(zone, year) %>%
summarise(species_count = n_distinct(name), .groups = "drop")
# Make zone a factor for clean ordering
zone_species_counts$zone <- factor(zone_species_counts$zone, levels = sort(unique(zone_species_counts$zone)))
# Plot
plot_zone <- ggplot(zone_species_counts, aes(x = zone, y = species_count, fill = factor(year))) +
geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
scale_y_continuous(
limits = c(0, 10),
breaks = 0:12) +
labs(
title = "Species Richness per Zone by Year",
x = "Zone",
y = "Number of Unique Species",
fill = "Year"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
plot_zone
# Count unique families per zone per year
zone_family_counts <- df %>%
filter(coverage > 0) %>% # only count species that are actually present
group_by(zone, family, year) %>%
summarise(family_count = n_distinct(family), .groups = "drop")
# Aggregate to get family count per zone per year
zone_family_counts_summary <- zone_family_counts %>%
group_by(zone, year) %>%
summarise(family_count = n_distinct(family), .groups = "drop")
# Make zone a factor for clean ordering
zone_family_counts_summary$zone <- factor(zone_family_counts_summary$zone, levels = sort(unique(zone_family_counts_summary$zone)))
# Plot
plot_zone_family <- ggplot(zone_family_counts_summary, aes(x = zone, y = family_count, fill = factor(year))) +
geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
scale_y_continuous(
limits = c(0, 8),
breaks = 0:8 # only whole numbers from 0 to 12
) +
labs(
title = "Family count per zone by year",
x = "Zone",
y = "Number of unique families",
fill = "Year"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# Show plot
print(plot_zone_family)
# Count unique species per zone per year
zone_species_counts <- df %>%
filter(coverage > 0) %>% # only count species that are actually present
group_by(zone, year) %>%
summarise(species_count = n_distinct(name), .groups = "drop")
# Make zone a factor for clean ordering
zone_species_counts$zone <- factor(zone_species_counts$zone, levels = sort(unique(zone_species_counts$zone)))
# Plot
plot_zone_species <- ggplot(zone_species_counts, aes(x = zone, y = species_count, fill = factor(year))) +
geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
scale_y_continuous(
limits = c(0, 10),
breaks = 0:10 # only whole numbers from 0 to 12
) +
labs(
title = "Unique species count per zone per year",
x = "Zone",
y = "Number of unique species",
fill = "Year"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# Show plot
print(plot_zone_species)
library(readxl)
library(dplyr)
library(tidyr)
library(writexl)
## Warning: package 'writexl' was built under R version 4.4.3
# Read the data
df <- read_excel("C:/Users/Arne/Downloads/Waarnemingen libellen (1).xlsx", sheet = "Sheet10")
# Summarize and make the data long
df_summary <- df %>%
group_by(name, zone, year) %>%
summarise(coverage = sum(coverage), .groups = "drop")
# Create the long format dataframe
df_long <- df_summary %>%
arrange(zone, year, name) # Sort the data for better presentation
# Write the long-format data to an Excel file
write_xlsx(df_long, "species_per_zone_year.xlsx")
# Show the resulting dataframe in the console
print(df_long)
## # A tibble: 58 × 4
## name zone year coverage
## <chr> <dbl> <dbl> <dbl>
## 1 Alnus glutinosa 1 2023 30
## 2 Salix cinerea 1 2023 70
## 3 Alnus glutinosa 1 2024 24
## 4 Salix alba 1 2024 1
## 5 Salix cinerea 1 2024 75
## 6 Ceratophyllum demersum 2 2023 5
## 7 Humulus lupulus 2 2023 1
## 8 Mentha aquatica 2 2023 2
## 9 Nymphaea marliacea 2 2023 2
## 10 Phragmites australis 2 2023 90
## # ℹ 48 more rows
library(ggplot2)
library(dplyr)
library(tidyr)
df_presence <- df %>%
mutate(present = 1) %>%
dplyr::select(name, zone, year, present) %>%
tidyr::pivot_wider(names_from = year, values_from = present, values_fill = 0)
# Adjust for actual year names (2023 and 2024)
df_presence <- df_presence %>%
mutate(
presence_combined = case_when(
`2023` == 1 & `2024` == 1 ~ "Both",
`2023` == 1 ~ "Year 2023",
`2024` == 1 ~ "Year 2024",
TRUE ~ "None"
)
)
# Reshape for plotting (x = zone, y = species)
plot_data <- df_presence %>%
dplyr::select(name, zone, presence_combined)
# Plot
gg <- ggplot(plot_data, aes(x = factor(zone), y = name, fill = presence_combined)) +
geom_tile(color = "white") +
scale_fill_manual(values = c("Year 2023" = "blue", "Year 2024" = "red", "Both" = "purple", "None" = "white")) +
labs(
title = "Species Presence per Zone and Year",
x = "Zone",
y = "Species",
fill = "Presence"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 12),
axis.text.y = element_text(face = "italic")
)
gg
# Load necessary libraries
library(dplyr)
library(tidyr)
library(vegan) # For PCoA (cmdscale) and distance calculations
library(ggplot2)
library(RColorBrewer)
# Read and prepare the data (assuming 'df' is already loaded with your data)
# Summarize species coverage per zone and year
df_summary <- df %>%
group_by(name, zone, year) %>%
summarise(coverage = sum(coverage), .groups = "drop")
# Create a pivot table: Species as columns, zones and years as rows
df_pivot <- df_summary %>%
spread(key = name, value = coverage, fill = 0) %>%
dplyr::select(-zone, -year)
dist_matrix <- vegdist(df_pivot, method = "euclidean")
# Perform Principal Coordinates Analysis (PCoA)
pcoa_result <- cmdscale(dist_matrix, k = 2) # k = 2 for 2-dimensional PCoA
pcoa_df <- as.data.frame(pcoa_result)
# Add zone and year information to the PCoA results
df_summary_zone_year <- df_summary %>%
dplyr::select(zone, year) %>%
distinct()
pcoa_df$zone <- df_summary_zone_year$zone
pcoa_df$year <- df_summary_zone_year$year
# Plot the PCoA results
ggplot(pcoa_df, aes(x = V1, y = V2, color = factor(zone), shape = factor(year))) +
geom_point(size = 3) +
scale_color_manual(values = RColorBrewer::brewer.pal(7, "Set1")) + # Custom colors for zones
labs(
title = "PCoA of species composition across zones and years (Euclidian, Abundance excluded)",
x = "PCoA Axis 1",
y = "PCoA Axis 2",
color = "Zone",
shape = "Year"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 12))
# Load necessary libraries
library(dplyr)
library(tidyr)
library(vegan) # For PCoA (cmdscale) and distance calculations
library(ggplot2)
library(RColorBrewer)
# Data summary
df_summary <- df %>%
group_by(name, zone, year) %>%
summarise(coverage = sum(coverage), .groups = "drop")
# Create a pivot table: Species as columns, zones and years as rows
df_pivot <- df_summary %>%
spread(key = name, value = coverage, fill = 0) %>%
dplyr::select(-zone, -year)
# Optional: Scale the data before calculating distances (important for Euclidean distance)
df_pivot_scaled <- scale(df_pivot) # Scaling the data to normalize species abundances
# Calculate Euclidean distance (including abundance, after scaling)
dist_matrix_euclidean <- dist(df_pivot_scaled, method = "euclidean")
# Perform PCoA for Euclidean distance (including abundance)
pcoa_result_euclidean <- cmdscale(dist_matrix_euclidean, k = 2) # k = 2 for 2-dimensional PCoA
pcoa_df_euclidean <- as.data.frame(pcoa_result_euclidean)
# Add zone and year information to the PCoA results
df_summary_zone_year <- df_summary %>%
dplyr::select(zone, year) %>%
distinct()
pcoa_df_euclidean$zone <- df_summary_zone_year$zone
pcoa_df_euclidean$year <- df_summary_zone_year$year
# Plot PCoA results for Euclidean distance (abundance included)
ggplot(pcoa_df_euclidean, aes(x = V1, y = V2, color = factor(zone), shape = factor(year))) +
geom_point(size = 3) +
scale_color_manual(values = RColorBrewer::brewer.pal(7, "Set1")) +
labs(
title = "PCoA of species composition across zones and years (Euclidean, Abundance included)",
x = "PCoA Axis 1",
y = "PCoA Axis 2",
color = "Zone",
shape = "Year"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 12))
HOPEFULLY ITS CLEAR THAT WE NOW GO BACK TO THE ODONATA AND LEAVE THE VEGETATION BEHIND!
library(readxl)
setwd("~/Luna")
Luna <- read_excel("Luna.xlsx")
df_wandel <- read_excel("Luna.xlsx", sheet = "Sheet8")
library(ggplot2)
library(dplyr)
library(tibble)
# Your data manually entered
df <- tribble(
~zone, ~abundance,
"zone 1", 74,
"zone 2", 170,
"zone 3", 279,
"zone 4", 141,
"zone 5", 114,
"zone 6", 79,
"zone 7", 26,
"", NA, # visual gap
"wandeling", 139
)
# Custom order including the gap
df$zone <- factor(df$zone, levels = df$zone)
# Plot with gap
ggplot(df, aes(x = zone, y = abundance)) +
geom_bar(stat = "identity", fill = "steelblue", na.rm = TRUE) +
labs(
title = "Total abundance per zone",
x = "Zone",
y = "Total Abundance"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(ggplot2)
library(dplyr)
library(tibble)
# Species richness data manually entered
df <- tribble(
~zone, ~richness,
"zone 1", 15,
"zone 2", 17,
"zone 3", 20,
"zone 4", 17,
"zone 5", 17,
"zone 6", 15,
"zone 7", 9,
"", NA, # visual gap
"wandeling", 19
)
# Preserve plotting order
df$zone <- factor(df$zone, levels = df$zone)
# Barplot
ggplot(df, aes(x = zone, y = richness)) +
geom_bar(stat = "identity", fill = "darkseagreen", na.rm = TRUE) +
labs(
title = "Species richness per zone",
x = "Zone",
y = "Species Richness"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ purrr 1.0.2
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ gridExtra::combine() masks dplyr::combine()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ car::recode() masks dplyr::recode()
## ✖ MASS::select() masks dplyr::select()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read_excel("Luna.xlsx", sheet = "Sheet12")
## New names:
## • `Zone "walk"` -> `Zone "walk"...2`
## • `Zone 1` -> `Zone 1...3`
## • `Zone 2` -> `Zone 2...4`
## • `Zone 3` -> `Zone 3...5`
## • `Zone 4` -> `Zone 4...6`
## • `Zone 5` -> `Zone 5...7`
## • `Zone 6` -> `Zone 6...8`
## • `Zone 7` -> `Zone 7...9`
## • `Zone "walk"` -> `Zone "walk"...10`
## • `Zone 1` -> `Zone 1...11`
## • `Zone 2` -> `Zone 2...12`
## • `Zone 3` -> `Zone 3...13`
## • `Zone 4` -> `Zone 4...14`
## • `Zone 5` -> `Zone 5...15`
## • `Zone 6` -> `Zone 6...16`
## • `Zone 7` -> `Zone 7...17`
# Convert to long format
df_long <- df %>%
pivot_longer(cols = starts_with("zone"), names_to = "zone", values_to = "abundance") %>%
mutate(zone = str_replace(zone, "zone", ""))
# Plot
ggplot(df_long, aes(x = factor(zone), y = fct_rev(species), fill = abundance)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "blue", high = "red", na.value = "white") +
labs(
title = "Species abundance per zone",
x = "Zone",
y = "Species",
fill = "Abundance"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.text.y = element_text(face = "italic")
)
Groen is aanwezig - rood is afwezig
library(tidyverse)
df <- read_excel("Luna.xlsx", sheet = "Sheet13")
# Convert to long format
df_long <- df %>%
pivot_longer(cols = starts_with("zone"), names_to = "zone", values_to = "abundance") %>%
mutate(zone = str_replace(zone, "zone", ""))
# Plot
ggplot(df_long, aes(x = factor(zone), y = fct_rev(species), fill = abundance)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "red", high = "green", na.value = "white") +
labs(
title = "Species abundance per zone",
x = "Zone",
y = "Species",
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.text.y = element_text(face = "italic"),
legend.position = "none"
)
df <- read_excel("Luna.xlsx", sheet = "Sheet14")
# Load libraries
library(readxl)
library(tidyverse)
library(vegan)
# Step 2: Clean and prep the data
# We'll aggregate abundance per species per zone
df_clean <- df %>%
filter(!is.na(notes)) %>%
mutate(zone = notes) %>%
group_by(scientific, zone) %>%
summarise(abundance = sum(number), .groups = "drop") %>%
pivot_wider(names_from = zone, values_from = abundance, values_fill = 0)
# Step 3: Make the species names row names (vegan prefers this)
df_matrix <- df_clean %>%
column_to_rownames(var = "scientific")
# Step 4: Compute distance matrix (e.g., Bray-Curtis)
dist_matrix <- vegdist(df_matrix, method = "bray")
# Step 5: Perform PCoA
pcoa_result <- cmdscale(dist_matrix, k = 2, eig = TRUE)
# Step 6: Plot the results
pcoa_df <- as.data.frame(pcoa_result$points)
pcoa_df$species <- rownames(pcoa_df)
ggplot(pcoa_df, aes(x = V1, y = V2, label = species)) +
geom_point() +
geom_text(vjust = -0.5, hjust = 0.5, size = 3) +
labs(
title = "PCoA of Species Abundance by Zone",
x = "PCoA Axis 1",
y = "PCoA Axis 2"
) +
theme_minimal()
# Load libraries
library(readxl)
library(tidyverse)
library(vegan)
# Step 2: Prepare the data — zones as rows, species as columns
df_clean <- df %>%
filter(!is.na(notes)) %>%
group_by(notes, scientific) %>%
summarise(abundance = sum(number), .groups = "drop") %>%
pivot_wider(names_from = scientific, values_from = abundance, values_fill = 0)
# Step 3: Set zones as row names
df_matrix <- df_clean %>%
column_to_rownames(var = "notes")
# Step 4: Compute distance matrix (e.g., Bray-Curtis)
dist_matrix <- vegdist(df_matrix, method = "bray")
# Step 5: Perform PCoA
pcoa_result <- cmdscale(dist_matrix, k = 2, eig = TRUE)
# Step 6: Plot
pcoa_df <- as.data.frame(pcoa_result$points)
pcoa_df$zone <- rownames(pcoa_df)
ggplot(pcoa_df, aes(x = V1, y = V2, label = zone)) +
geom_point(size = 3, color = "steelblue") +
geom_text(vjust = -0.5, size = 3) +
labs(
title = "PCoA of Zones Based on Species Composition",
x = "PCoA Axis 1",
y = "PCoA Axis 2"
) +
theme_minimal()
df <- read_excel("Luna.xlsx", sheet = "Sheet15")
sex_summary <- df %>%
group_by(sex) %>%
summarise(total = sum(number), .groups = "drop")
ggplot(sex_summary, aes(x = sex, y = total, fill = sex)) +
geom_col(width = 0.6) +
scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e", "U" = "#999999")) +
labs(
title = "Abundance per sex",
x = "Sex",
y = "Number of individuals"
) +
theme_minimal()
library(readxl)
library(tidyverse)
df %>%
group_by(date, sex) %>%
summarise(total = sum(number), .groups = "drop") %>%
ggplot(aes(x = date, y = total, fill = sex)) +
geom_col(position = position_dodge(width = 0.7), width = 0.6) +
scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e", "U" = "#999999")) +
labs(
title = "Abundance per sex and date",
x = "Date",
y = "Number of individuals",
fill = "Sex"
) +
theme_minimal()
df %>%
group_by(date, sex) %>%
summarise(total = sum(number), .groups = "drop") %>%
ggplot(aes(x = date, y = total)) +
geom_col(aes(fill = sex), position = position_dodge(width = 0.7), width = 0.6) +
scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e", "U" = "#999999")) +
labs(
title = "Abundance per sex and date",
x = "Date",
y = "Number of individuals",
fill = "Sex",
color = "Sex"
) +
facet_wrap(~ sex, scales = "free_y") + # Create separate plots for each sex
theme_minimal() +
theme(
legend.position = "none", # Optionally remove legend for cleaner output
axis.text.x = element_text(angle = 90, hjust = 1) # Rotate x-axis labels by 90 degrees
)
library(readxl)
library(tidyr)
library(dplyr)
library(ggplot2)
df <- read_excel("Luna.xlsx", sheet = "Sheet18")
# Rename the first column to 'dragonfly' (if needed)
colnames(df)[1] <- "dragonfly"
# Reshape from wide to long format
df_long <- df %>%
pivot_longer(
cols = -dragonfly,
names_to = "plant",
values_to = "count"
) %>%
filter(!is.na(count) & count > 0)
# Create dot plot (like a checklist)
ggplot(df_long, aes(x = plant, y = dragonfly)) +
geom_point(shape = 21, fill = "#1f77b4", size = 4, stroke = 0.5) +
labs(
title = "Odonata Species Associated with Plant Species",
x = "Plant Species",
y = "Odonata Species"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
library(readxl)
library(tidyverse)
# Rename first column to 'odonata' for clarity
df <- df %>%
rename(odonata = 1)
# Convert to long format
df_long <- df %>%
pivot_longer(
cols = -odonata,
names_to = "plant",
values_to = "abundance"
)
# Plot as heatmap-style tile plot
ggplot(df_long, aes(x = plant, y = fct_rev(odonata), fill = abundance)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "blue", high = "red", na.value = "white") +
labs(
title = "Odonata Species Abundance per Plant Species",
x = "Plant Species",
y = "Odonata Species",
fill = "Abundance"
) +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(face = "italic", size = 8)
)
library(readxl)
library(tidyverse)
# Rename first column to 'odonata'
df <- df %>%
rename(odonata = 1)
# Convert to long format and remove NAs
df_long <- df %>%
pivot_longer(
cols = -odonata,
names_to = "plant",
values_to = "abundance"
) %>%
filter(!is.na(abundance))
# Plot the tile map
ggplot(df_long, aes(x = plant, y = fct_rev(odonata), fill = abundance)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "blue", high = "red") +
labs(
title = "Odonata Species Abundance per Plant Species",
x = "Plant Species",
y = "Odonata Species",
fill = "Abundance"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(face = "italic", size = 8)
)
df <- read_excel("Luna.xlsx", sheet = "Sheet20")
library(readxl)
library(tidyverse)
library(lubridate)
# Convert 'time' column to proper time format (assuming it's HH:MM:SS)
df <- df %>%
mutate(time = as.POSIXct(time, format = "%H:%M:%S"))
ggplot(df, aes(x = time, y = number)) +
geom_point(size = 2, alpha = 0.8, color = "#1f77b4") +
geom_smooth(method = "loess", color = "#08306b", se = FALSE, size = 1.5) +
scale_x_datetime(date_labels = "%H:%M") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
labs(
title = "Abundance Over Time",
x = "Time of Day",
y = "Number of Individuals"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).