library(vegan)
## Loading required package: permute
## Loading required package: lattice
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)

Here we are going to show and calculate the species distribution

library(readxl)   
library(ggplot2)  
library(dplyr)    
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
par(mfrow = c(1,1))
setwd("~/Luna")
df <- read_excel("Luna.xlsx", sheet = "Sheet22")    

df_count <- df

speciescount <- ggplot(df_count, aes(x = amount, y = reorder(scientific, amount))) +  
  geom_bar(stat = "identity", fill = "steelblue") +  
  labs(x = "Number of abundance", y = "", title = "Diversity (Speciesand abundance of Odonata)") +  
  theme_minimal() +
  theme(axis.text.y = element_text(face = "italic"))

speciescount

Above here we see the distribution of all Odonata species found during our experiment. On the y-axis of this barplot we see the scientific name of the species, and on the x-axis the amount of times this species was found, Note that the y-axis is sorted based on abundance.

And now the smae for the family:

df <- read_excel("Luna.xlsx", sheet = "Data export")    

df_count <- df %>%  
  count(family, name = "count")    

familycount <- ggplot(df_count, aes(x = count, y = reorder(family, count))) +  
  geom_bar(stat = "identity", fill = "steelblue") +  
  labs(x = "Number of abundance", y = "Family name", title = "Distribution of Families") +  
  theme_minimal() +
  theme(axis.text.y = element_text(face = "italic"))

familycount

The description is the same as the one in the figure above.

Next up, we will calculate the species evenness metrics. Using the Shannon-Wiener index, as well as the Simpson index and it’s reverse form.

library(dplyr)  
library(dplyr)  

data <- data.frame(
  species = c("Aeshna cyanea", "Aeshna grandis", "Aeshna isoceles", "Aeshna mixta", "Anax imperator", "Anax parthenope", 
              "Calopteryx splendens", "Calopteryx virgo", "Chalcolestes viridis", "Coenagrion puella", "Coenagrion pulchellum", 
              "Coenagrion scitulum", "Enallagma cyathigerum", "Erythromma viridulum", "Gomphus pulchellus", "Ischnura elegans", 
              "Libellula depressa", "Libellula quadrimaculata", "Orthetrum cancellatum", "Orthetrum coerulescens", 
              "Platycnemis pennipes", "Pyrrhosoma nymphula", "Sympetrum sanguineum", "Sympetrum striolatum", "Sympetrum vulgatum"),
  count = c(7, 18, 2, 38, 46, 11, 8, 25, 13, 132, 8, 1, 192, 13, 7, 131, 20, 7, 60, 122, 40, 23, 88, 5, 5)
)

total <- sum(data$count)  
p <- data$count / total  

shannon_index <- -sum(p * log(p))  
simpson_index <- sum(p^2)  
inverse_simpson <- 1 / simpson_index  

shannon_index  
## [1] 2.595379
simpson_index  
## [1] 0.1010106
inverse_simpson  
## [1] 9.899947
data <- data.frame(index = c("Shannon", "Simpson", "InverseSimpson"), values = c(2.593792, 0.1010106, 9.899947))
data
##            index    values
## 1        Shannon 2.5937920
## 2        Simpson 0.1010106
## 3 InverseSimpson 9.8999470

This leads us to this data table, where we can see a very low Simpson index of about 0.10. Indicating little diversity in our species range. A Shannon index of about 2.6 doenn’t give a terrible evenness, but not great either. This probably means that at least a couple of species are dominating the sample pool.

library(ggplot2)
library(vegan)

# Sample data
sample_moment <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
cumulative_species <- c(0, 11, 15, 18, 19, 22, 22, 23, 23, 23, 23, 25, 25, 25, 25, 25)

species_matrix <- matrix(0, nrow = length(sample_moment), ncol = max(cumulative_species))
for (i in 1:length(sample_moment)) {
  species_matrix[i, 1:cumulative_species[i]] <- 1
}

accum_model <- specaccum(species_matrix, method = "random")

future_moments <- 1:21

predicted_species <- accum_model$richness
predicted_species <- c(predicted_species, rep(predicted_species[length(predicted_species)], length(future_moments) - length(predicted_species)))

plot_data <- data.frame(Sample_Moment = sample_moment, Cumulative_Species = cumulative_species)
future_data <- data.frame(Sample_Moment = future_moments, Predicted = predicted_species)

ggplot() +
  geom_line(data = future_data, aes(x = Sample_Moment, y = Predicted), color = "green", linetype = "dashed", size = 1) +
  geom_line(data = plot_data, aes(x = Sample_Moment, y = Cumulative_Species), color = "hotpink", size = 1) +
  geom_point(data = plot_data, aes(x = Sample_Moment, y = Cumulative_Species), color = "coral", size = 2) +
  labs(
    title = "Rarefaction Curve with Predicted Species Accumulation",
    x = "Sampling Effort (Moment)",
    y = "Cumulative Species Found"
  ) +
  theme_minimal()

Here we get our rarefraction curve, with a theoretical predicted line (green) based on our species found, showing an exponential growth of species. It looks like an increase in sampling effort wouldn’t lead to many new species. Although in this province we have a rich Odonata fauna. Coming to 58-59 species in the last 3 years (waarnemingen.be)

library(ggplot2)
library(vegan)

sample_moment <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
cumulative_species <- c(0, 11, 15, 18, 19, 22, 22, 23, 23, 23, 23, 25, 25, 25, 25, 25)

loess_model <- loess(cumulative_species ~ sample_moment)

correlation <- cor(sample_moment, cumulative_species)

lm_model <- lm(cumulative_species ~ sample_moment)
p_value <- summary(lm_model)$coefficients[2, 4]

ggplot(data = data.frame(Sample_Moment = sample_moment, Cumulative_Species = cumulative_species), aes(x = Sample_Moment, y = Cumulative_Species)) +
  geom_point(color = "coral", size = 2) + 
  geom_smooth(method = "loess", color = "hotpink", linetype = "solid", size = 1, se = TRUE) + 
  labs(
    title = "Rarefaction Curve with LOESS Fit",
    x = "Sampling Effort (Moment)",
    y = "Cumulative Species Found"
  ) +
  annotate("text", x = 10, y = 17, label = paste("Pearson r =", round(correlation, 2)), color = "black", size = 5) +
  annotate("text", x = 10, y = 15, label = paste("p-value =", round(p_value, 4)), color = "black", size = 5) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Here we fitted the same data, but we included Pearson correlation and a p-value. We see a high correlation in our LOESS-fitted line, indicating a pattern of exponential increase of our data. The p-value shows the significance of this fit. (But I think since we fitted this regression ourselves we can ignore this number, even though it is significant.)

Now we do an ANOVA with our environmental variables to see the effect on species

library(readxl)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(ggplot2)

data <- read_excel("ANOVA-time.xlsx")
head(data)
## # A tibble: 6 × 7
##   TemperatureMean CloudCover  Wind WindDirection Humidity Pressure Species
##             <dbl>      <dbl> <dbl>         <dbl>    <dbl>    <dbl>   <dbl>
## 1            17.5         50    20           180       52     1005      11
## 2            21.5         33    18           270       66     1015      15
## 3            19           50    18           270       71     1012      13
## 4            26           10     6           180       45     1021      11
## 5            24.5         60    11           270       50     1013      16
## 6            24.5          0     4             0       65     1012      11
full_model <- lm(Species ~ TemperatureMean + CloudCover + Wind + WindDirection + Humidity + Pressure, data = data)

vif_values <- vif(full_model)
print(vif_values)
## TemperatureMean      CloudCover            Wind   WindDirection        Humidity 
##        1.844767        2.283599        1.791559        2.782105        3.345319 
##        Pressure 
##        2.908007
null_model <- lm(Species ~ 1, data = data)

forward_model <- step(null_model, direction = "forward", scope = formula(full_model))
## Start:  AIC=45.67
## Species ~ 1
## 
##                   Df Sum of Sq    RSS    AIC
## + TemperatureMean  1   146.144 129.59 36.345
## + Humidity         1   112.802 162.93 39.779
## + Pressure         1    65.205 210.53 43.624
## + WindDirection    1    55.264 220.47 44.316
## <none>                         275.73 45.671
## + CloudCover       1    19.778 255.96 46.554
## + Wind             1     5.613 270.12 47.362
## 
## Step:  AIC=36.34
## Species ~ TemperatureMean
## 
##                 Df Sum of Sq     RSS    AIC
## + Wind           1   30.2322  99.357 34.360
## + Humidity       1   16.3254 113.264 36.325
## <none>                       129.589 36.345
## + WindDirection  1   16.1713 113.418 36.345
## + Pressure       1    7.3461 122.243 37.469
## + CloudCover     1    1.6536 127.935 38.152
## 
## Step:  AIC=34.36
## Species ~ TemperatureMean + Wind
## 
##                 Df Sum of Sq    RSS    AIC
## + Pressure       1   30.3995 68.957 30.882
## + Humidity       1   12.7124 86.645 34.306
## <none>                       99.357 34.360
## + WindDirection  1    8.0253 91.332 35.097
## + CloudCover     1    2.1478 97.209 36.032
## 
## Step:  AIC=30.88
## Species ~ TemperatureMean + Wind + Pressure
## 
##                 Df Sum of Sq    RSS    AIC
## <none>                       68.957 30.882
## + CloudCover     1   2.11125 66.846 32.415
## + Humidity       1   1.41633 67.541 32.570
## + WindDirection  1   0.16539 68.792 32.846
summary(forward_model)
## 
## Call:
## lm(formula = Species ~ TemperatureMean + Wind + Pressure, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6128 -0.5317  0.1811  0.7658  5.1689 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -270.9968   118.1629  -2.293  0.04252 * 
## TemperatureMean    0.7214     0.1881   3.836  0.00276 **
## Wind               0.4403     0.1510   2.915  0.01405 * 
## Pressure           0.2585     0.1174   2.202  0.04990 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.504 on 11 degrees of freedom
## Multiple R-squared:  0.7499, Adjusted R-squared:  0.6817 
## F-statistic: 10.99 on 3 and 11 DF,  p-value: 0.001226
backward_model <- step(full_model, direction = "backward")
## Start:  AIC=35.25
## Species ~ TemperatureMean + CloudCover + Wind + WindDirection + 
##     Humidity + Pressure
## 
##                   Df Sum of Sq     RSS    AIC
## - WindDirection    1     2.987  64.850 33.960
## - CloudCover       1     3.736  65.600 34.133
## - Humidity         1     4.735  66.598 34.359
## <none>                          61.864 35.253
## - Pressure         1    24.465  86.328 38.252
## - Wind             1    35.653  97.516 40.080
## - TemperatureMean  1    52.504 114.368 42.471
## 
## Step:  AIC=33.96
## Species ~ TemperatureMean + CloudCover + Wind + Humidity + Pressure
## 
##                   Df Sum of Sq     RSS    AIC
## - Humidity         1     1.996  66.846 32.415
## - CloudCover       1     2.691  67.541 32.570
## <none>                          64.850 33.960
## - Pressure         1    21.713  86.563 36.292
## - Wind             1    32.784  97.635 38.098
## - TemperatureMean  1    61.270 126.120 41.938
## 
## Step:  AIC=32.42
## Species ~ TemperatureMean + CloudCover + Wind + Pressure
## 
##                   Df Sum of Sq     RSS    AIC
## - CloudCover       1     2.111  68.957 30.882
## <none>                          66.846 32.415
## - Pressure         1    30.363  97.209 36.032
## - Wind             1    42.481 109.328 37.794
## - TemperatureMean  1    93.870 160.716 43.574
## 
## Step:  AIC=30.88
## Species ~ TemperatureMean + Wind + Pressure
## 
##                   Df Sum of Sq     RSS    AIC
## <none>                          68.957 30.882
## - Pressure         1    30.400  99.357 34.360
## - Wind             1    53.286 122.243 37.469
## - TemperatureMean  1    92.259 161.217 41.620
summary(backward_model)
## 
## Call:
## lm(formula = Species ~ TemperatureMean + Wind + Pressure, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6128 -0.5317  0.1811  0.7658  5.1689 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -270.9968   118.1629  -2.293  0.04252 * 
## TemperatureMean    0.7214     0.1881   3.836  0.00276 **
## Wind               0.4403     0.1510   2.915  0.01405 * 
## Pressure           0.2585     0.1174   2.202  0.04990 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.504 on 11 degrees of freedom
## Multiple R-squared:  0.7499, Adjusted R-squared:  0.6817 
## F-statistic: 10.99 on 3 and 11 DF,  p-value: 0.001226

Which results in a model, forward and backwards, that suggest that the mean temperature of the afternoon, wind, and air pressure are contributing the most to the species found. Wind speed, humidity, and clour cover dont seem to do much. The last one is suprising to me!

Now for abundancy

data <- read_excel("ANOVA-time2.xlsx")

full_model <- lm(Abundancy ~ TemperatureMean + CloudCover + Wind + WindDirection + Humidity + Pressure, data = data)

vif_values <- vif(full_model)
print(vif_values)
## TemperatureMean      CloudCover            Wind   WindDirection        Humidity 
##        1.844767        2.283599        1.791559        2.782105        3.345319 
##        Pressure 
##        2.908007
null_model <- lm(Abundancy ~ 1, data = data)

forward_model <- step(null_model, direction = "forward", scope = formula(full_model))
## Start:  AIC=114.29
## Abundancy ~ 1
## 
##                   Df Sum of Sq   RSS    AIC
## + TemperatureMean  1   11908.0 14844 107.46
## + Humidity         1    9475.4 17276 109.74
## + CloudCover       1    6354.7 20397 112.23
## + Pressure         1    3469.3 23283 114.21
## <none>                         26752 114.30
## + WindDirection    1    3203.4 23548 114.38
## + Wind             1     354.4 26397 116.09
## 
## Step:  AIC=107.46
## Abundancy ~ TemperatureMean
## 
##                 Df Sum of Sq   RSS    AIC
## <none>                       14844 107.46
## + Humidity       1   1469.75 13374 107.89
## + CloudCover     1   1090.13 13754 108.31
## + WindDirection  1    639.92 14204 108.80
## + Pressure       1     74.23 14770 109.38
## + Wind           1     66.54 14777 109.39
summary(forward_model)
## 
## Call:
## lm(formula = Abundancy ~ TemperatureMean, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.484 -19.712   2.362  18.748  66.671 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -75.525     45.332  -1.666  0.11961   
## TemperatureMean    7.231      2.239   3.229  0.00658 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.79 on 13 degrees of freedom
## Multiple R-squared:  0.4451, Adjusted R-squared:  0.4024 
## F-statistic: 10.43 on 1 and 13 DF,  p-value: 0.006585
backward_model <- step(full_model, direction = "backward")
## Start:  AIC=114.3
## Abundancy ~ TemperatureMean + CloudCover + Wind + WindDirection + 
##     Humidity + Pressure
## 
##                   Df Sum of Sq   RSS    AIC
## - WindDirection    1      27.8 12054 112.34
## - Wind             1     121.7 12148 112.45
## - Pressure         1     362.1 12389 112.75
## - Humidity         1     423.3 12450 112.82
## - CloudCover       1    1303.0 13330 113.85
## <none>                         12027 114.30
## - TemperatureMean  1    3216.0 15243 115.86
## 
## Step:  AIC=112.34
## Abundancy ~ TemperatureMean + CloudCover + Wind + Humidity + 
##     Pressure
## 
##                   Df Sum of Sq   RSS    AIC
## - Wind             1     187.0 12242 110.57
## - Pressure         1     345.9 12400 110.76
## - Humidity         1     909.1 12964 111.43
## - CloudCover       1    1277.4 13332 111.85
## <none>                         12054 112.34
## - TemperatureMean  1    3245.2 15300 113.91
## 
## Step:  AIC=110.57
## Abundancy ~ TemperatureMean + CloudCover + Humidity + Pressure
## 
##                   Df Sum of Sq   RSS    AIC
## - Pressure         1    552.45 12794 109.23
## - CloudCover       1   1094.46 13336 109.85
## - Humidity         1   1326.60 13568 110.11
## <none>                         12242 110.57
## - TemperatureMean  1   3105.83 15347 111.96
## 
## Step:  AIC=109.23
## Abundancy ~ TemperatureMean + CloudCover + Humidity
## 
##                   Df Sum of Sq   RSS    AIC
## - CloudCover       1    580.07 13374 107.89
## - Humidity         1    959.68 13754 108.31
## <none>                         12794 109.23
## - TemperatureMean  1   2944.47 15738 110.34
## 
## Step:  AIC=107.9
## Abundancy ~ TemperatureMean + Humidity
## 
##                   Df Sum of Sq   RSS    AIC
## - Humidity         1    1469.8 14844 107.46
## <none>                         13374 107.89
## - TemperatureMean  1    3902.3 17276 109.74
## 
## Step:  AIC=107.46
## Abundancy ~ TemperatureMean
## 
##                   Df Sum of Sq   RSS    AIC
## <none>                         14844 107.46
## - TemperatureMean  1     11908 26752 114.30
summary(backward_model)
## 
## Call:
## lm(formula = Abundancy ~ TemperatureMean, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -66.484 -19.712   2.362  18.748  66.671 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -75.525     45.332  -1.666  0.11961   
## TemperatureMean    7.231      2.239   3.229  0.00658 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.79 on 13 degrees of freedom
## Multiple R-squared:  0.4451, Adjusted R-squared:  0.4024 
## F-statistic: 10.43 on 1 and 13 DF,  p-value: 0.006585

With our abundancy we see that only mean temperature has an influence. For good measure, we will also fit the species-model-coefficients into our species model to see the results.

lmmm <- lm(formula = Abundancy ~ TemperatureMean + Wind + Pressure, data = data)
summary(lmmm)
## 
## Call:
## lm(formula = Abundancy ~ TemperatureMean + Wind + Pressure, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -65.167 -18.005   4.328  18.764  66.911 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -708.0407  1719.4786  -0.412   0.6884  
## TemperatureMean    6.9554     2.7366   2.542   0.0274 *
## Wind               0.7807     2.1974   0.355   0.7291  
## Pressure           0.6206     1.7082   0.363   0.7232  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36.43 on 11 degrees of freedom
## Multiple R-squared:  0.4542, Adjusted R-squared:  0.3053 
## F-statistic: 3.051 on 3 and 11 DF,  p-value: 0.07395

However, only the mean temperature stays significant, while the others are not significant, boasting p-values around 0.72

First we will load our dataset into R

# Load required libraries
library(ggplot2)
library(tidyr)
library(dplyr)

# Create the data frame (you can replace this with your actual data)
data <- data.frame(
  Date = as.Date(c("2024-06-15", "2024-07-10", "2024-07-16", "2024-07-18", "2024-07-25",
                   "2024-08-02", "2024-08-10", "2024-08-19", "2024-08-23", "2024-09-08",
                   "2024-09-15", "2024-09-24", "2024-10-10", "2024-10-12", "2024-10-26")),
  Aeshna_cyanea = c(0, 3, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0),
  Aeshna_grandis = c(0, 0, 1, 1, 4, 0, 6, 4, 2, 2, 8, 0, 0, 5, 3),
  Aeshna_isoceles = c(1, 5, 7, 3, 8, 5, 1, 4, 4, 1, 1, 3, 6, 6, 6),
  Aeshna_mixta = c(0, 1, 0, 0, 4, 3, 2, 2, 4, 1, 0, 0, 0, 2, 0),
  Anax_imperator = c(2, 1, 5, 3, 2, 0, 3, 1, 1, 3, 4, 6, 2, 0, 6),
  Anax_parthenope = c(0, 9, 11, 6, 2, 18, 3, 7, 9, 16, 9, 0, 0, 3, 0),
  Calopteryx_splendens = c(10, 13, 12, 10, 9, 26, 16, 14, 12, 24, 9, 0, 0, 3, 8),
  Calopteryx_virgo = c(3, 5, 8, 4, 1, 15, 2, 7, 2, 2, 5, 6, 2, 3, 4),
  Chalcolestes_viridis = c(4, 9, 5, 13, 21, 3, 37, 26, 26, 17, 2, 0, 0, 0, 3),
  Coenagrion_puella = c(2, 5, 8, 3, 1, 6, 16, 23, 12, 5, 1, 3, 3, 1, 2),
  Coenagrion_pulchellum = c(3, 9, 10, 3, 2, 31, 37, 14, 26, 17, 9, 0, 14, 9, 11),
  Coenagrion_scitulum = c(2, 6, 3, 4, 18, 9, 8, 23, 18, 11, 1, 0, 6, 8, 4),
  Enallagma_cyathigerum = c(0, 6, 5, 3, 2, 3, 3, 7, 26, 5, 1, 5, 3, 2, 4),
  Erythromma_viridulum = c(2, 11, 10, 1, 0, 6, 7, 18, 3, 3, 1, 2, 7, 5, 0),
  Gomphus_pulchellus = c(0, 6, 0, 1, 0, 9, 23, 3, 26, 5, 3, 0, 0, 0, 1),
  Ischnura_elegans = c(1, 3, 5, 3, 18, 7, 2, 2, 7, 11, 10, 3, 8, 3, 1),
  Libellula_depressa = c(2, 9, 3, 1, 4, 31, 23, 18, 26, 5, 1, 1, 0, 2, 3),
  Libellula_quadrimaculata = c(1, 7, 4, 4, 21, 15, 37, 5, 26, 12, 10, 0, 1, 0, 4),
  Orthetrum_cancellatum = c(3, 5, 10, 1, 2, 6, 7, 18, 12, 3, 2, 0, 4, 2, 4),
  Orthetrum_coerulescens = c(4, 6, 3, 2, 7, 6, 23, 26, 2, 17, 1, 6, 8, 1, 11),
  Platycnemis_pennipes = c(2, 2, 3, 3, 4, 9, 5, 18, 7, 5, 1, 2, 0, 1, 0),
  Pyrrhosoma_nymphula = c(6, 11, 6, 6, 18, 31, 5, 23, 8, 3, 5, 0, 9, 0, 3),
  Sympetrum_sanguineum = c(3, 5, 6, 13, 18, 6, 1, 2, 8, 11, 1, 0, 3, 4, 7),
  Sympetrum_striolatum = c(2, 6, 7, 1, 7, 8, 6, 4, 3, 5, 0, 0, 1, 2, 2),
  Sympetrum_vulgatum = c(0, 3, 0, 3, 7, 9, 4, 4, 2, 3, 1, 0, 0, 2, 4)
)

long_data <- data %>%
  pivot_longer(cols = -Date, names_to = "Species", values_to = "Count")

Then we will create barplots for every species

# Create the bar plots for each species without the legend
ggplot(long_data, aes(x = Date, y = Count, fill = Species)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Species, scales = "free_y", ncol = 5) +
  labs(y = "Count", x = "Date") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")  # Remove the legend

Here with individual LOESS trendlines

# Create 25 species plots with trendlines for each species
ggplot(long_data, aes(x = Date, y = Count, fill = Species)) +
  geom_bar(stat = "identity", position = "stack") +  # Stacked bars for each species
  geom_smooth(method = "loess", color = "blue", se = FALSE, aes(group = 1)) +  # LOESS trendline
  labs(y = "Count", x = "Date") +
  facet_wrap(~Species, scales = "free_y", ncol = 5) +  # 25 plots in a 5x5 grid
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

As well as a lineplot for all species at once

# Create a single figure showing all species on one plot (Line plot) without legend
ggplot(long_data, aes(x = Date, y = Count, color = Species, group = Species)) +
  geom_line() + 
  geom_point() +
  labs(y = "Count", x = "Date") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")  # Remove the legend

And then we show a LOESS plot fitted to ALL species, to show the general trend of our abundancies

# Fit a trendline (LOESS) and plot it without individual species lines
ggplot(long_data, aes(x = Date, y = Count)) +
  geom_smooth(method = "loess", color = "blue", se = FALSE) +  # LOESS trendline without confidence interval shading
  labs(y = "Count", x = "Date") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## `geom_smooth()` using formula = 'y ~ x'

Now instead of the species we will do the same for the families

library(readxl)
library(tidyr)
library(ggplot2)


data <- read_excel("Luna.xlsx", sheet = "Sheet5")

long_data <- pivot_longer(data, cols = -Family, names_to = "Date", values_to = "Count")


long_data$Family <- factor(long_data$Family, levels = c("Aeshnidae", "Calopterygidae", "Coenagrionidae", 
                                                        "Lestidae", "Libellulidae", "Platycnemididae", "Gomphidae"))

ggplot(long_data, aes(x = Date, y = Count)) +
  geom_bar(stat = "identity", position = "stack", aes(fill = Family)) +
  labs(y = "Count", x = "Date") +
  theme_minimal() +
  facet_wrap(~Family, scales = "free_y", ncol = 2, nrow = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none",
        strip.text = element_text(size = 12)) 

However this looks ugly , so here is Gomphidae seperately

data <- read_excel("Luna.xlsx", sheet = "Sheet6")

long_data <- pivot_longer(data, cols = -Family, names_to = "Date", values_to = "Count")


long_data$Family <- factor(long_data$Family, levels = c("Aeshnidae", "Calopterygidae", "Coenagrionidae", "Lestidae", "Libellulidae", "Platycnemididae"))

ggplot(long_data, aes(x = Date, y = Count)) +
  geom_bar(stat = "identity", position = "stack", aes(fill = Family)) +
  labs(y = "Count", x = "Date") +
  theme_minimal() +
  facet_wrap(~Family, scales = "free_y", ncol = 2, nrow = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none",
        strip.text = element_text(size = 12)) 

data <- read_excel("Luna.xlsx", sheet = "Sheet7")

long_data <- pivot_longer(data, cols = -Family, names_to = "Date", values_to = "Count")


long_data$Family <- factor(long_data$Family, levels = "Gomphidae")

ggplot(long_data, aes(x = Date, y = Count)) +
  geom_bar(stat = "identity", position = "stack", aes(fill = Family)) +
  labs(y = "Count", x = "Date") +
  ggtitle("Gomphidae") +  # Add custom title
  theme_minimal() +
  facet_wrap(~Family, scales = "free_y", ncol = 2, nrow = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none",
        strip.text = element_text(size = 0)) +
  theme(plot.title = element_text(hjust = 0.5, color = "black")) +  
  scale_fill_manual(values = rep("hotpink", length(unique(long_data$Family)))) 

# Load libraries
library(ggplot2)
library(dplyr)
library(readr)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
df <- read_excel("C:/Users/Arne/Downloads/Waarnemingen libellen (1).xlsx", sheet = "Sheet10")


df_summary <- df %>%
  group_by(name, year) %>%
  summarise(coverage = sum(coverage), .groups = "drop")


# Get all unique species and years
all_species <- unique(df_summary$name)
all_years <- unique(df_summary$year)

# Create a complete grid of all combinations
complete_grid <- expand.grid(name = all_species, year = all_years)

# Join with actual data and fill NAs with 0
df_long <- complete_grid %>%
  left_join(df_summary, by = c("name", "year")) %>%
  mutate(coverage = replace_na(coverage, 0))

# View the long-format data
print(df_long)
##                                               name year coverage
## 1                                  Alnus glutinosa 2023       50
## 2                           Ceratophyllum demersum 2023        8
## 3                            Heracleum sphondylium 2023        1
## 4                                  Humulus lupulus 2023        1
## 5                                   Juncus effusus 2023        8
## 6                                Lycopus europaeus 2023        2
## 7                                  Mentha aquatica 2023        2
## 8                                Myosotis arvensis 2023        1
## 9                               Nymphaea marliacea 2023        2
## 10                            Phragmites australis 2023      278
## 11                                   Populus nigra 2023        2
## 12                            Populus x canadensis 2023        0
## 13                                 Prunus serotina 2023        1
## 14                                   Quercus robur 2023        1
## 15                            Rosa Subsec. Caninae 2023        1
## 16                                      Salix alba 2023        8
## 17                                   Salix cinerea 2023      331
## 18                                 Salix viminalis 2023        2
## 19 Salix x holosericea (S. cinerea x S. viminalis) 2023        1
## 20                                 Alnus glutinosa 2024       39
## 21                          Ceratophyllum demersum 2024        9
## 22                           Heracleum sphondylium 2024        3
## 23                                 Humulus lupulus 2024        2
## 24                                  Juncus effusus 2024        8
## 25                               Lycopus europaeus 2024        1
## 26                                 Mentha aquatica 2024        0
## 27                               Myosotis arvensis 2024        1
## 28                              Nymphaea marliacea 2024        2
## 29                            Phragmites australis 2024      278
## 30                                   Populus nigra 2024        0
## 31                            Populus x canadensis 2024        2
## 32                                 Prunus serotina 2024        0
## 33                                   Quercus robur 2024        2
## 34                            Rosa Subsec. Caninae 2024        0
## 35                                      Salix alba 2024       10
## 36                                   Salix cinerea 2024      341
## 37                                 Salix viminalis 2024        2
## 38 Salix x holosericea (S. cinerea x S. viminalis) 2024        0
df <- df_long

# Summarize: total coverage per species per year
summary_df <- df %>%
  group_by(name, year) %>%
  summarise(total_coverage = sum(coverage), .groups = "drop")

# Define species to isolate
focus_species <- c("Salix alba", "Salix cinerea", "Alnus glutinosa", "Phragmites australis")

# Create two subsets
df_focus <- summary_df %>% filter(name %in% focus_species)
df_other <- summary_df %>% filter(!name %in% focus_species)

# Order species alphabetically
df_focus$name <- factor(df_focus$name, levels = sort(unique(df_focus$name)))
df_other$name <- factor(df_other$name, levels = sort(unique(df_other$name)))

# Plot for other species (horizontal)
plot_other <- ggplot(df_other, aes(x = name, y = total_coverage, fill = factor(year))) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  labs(
    title = "Species Coverage by Year (Smaller abundances)",
    x = "Species",
    y = "Total Coverage",
    fill = "Year"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

# Plot for focus species (horizontal)
plot_focus <- ggplot(df_focus, aes(x = name, y = total_coverage, fill = factor(year))) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  labs(
    title = "Species Coverage by Year (Large abundances)",
    x = "Species",
    y = "Total Coverage",
    fill = "Year"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.y = element_text(face = "italic"))

# Show both
print(plot_focus)

print(plot_other)

df <- read_excel("C:/Users/Arne/Downloads/Waarnemingen libellen (1).xlsx", sheet = "Sheet10")

# Count unique species per zone per year
zone_species_counts <- df %>%
  filter(coverage > 0) %>%  # only count species that are actually present
  group_by(zone, year) %>%
  summarise(species_count = n_distinct(name), .groups = "drop")

# Make zone a factor for clean ordering
zone_species_counts$zone <- factor(zone_species_counts$zone, levels = sort(unique(zone_species_counts$zone)))

# Plot
plot_zone <- ggplot(zone_species_counts, aes(x = zone, y = species_count, fill = factor(year))) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
   scale_y_continuous(
    limits = c(0, 10),
    breaks = 0:12) + 
  labs(
    title = "Species Richness per Zone by Year",
    x = "Zone",
    y = "Number of Unique Species",
    fill = "Year"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

plot_zone

# Count unique families per zone per year
zone_family_counts <- df %>%
  filter(coverage > 0) %>%  # only count species that are actually present
  group_by(zone, family, year) %>%
  summarise(family_count = n_distinct(family), .groups = "drop")

# Aggregate to get family count per zone per year
zone_family_counts_summary <- zone_family_counts %>%
  group_by(zone, year) %>%
  summarise(family_count = n_distinct(family), .groups = "drop")

# Make zone a factor for clean ordering
zone_family_counts_summary$zone <- factor(zone_family_counts_summary$zone, levels = sort(unique(zone_family_counts_summary$zone)))

# Plot
plot_zone_family <- ggplot(zone_family_counts_summary, aes(x = zone, y = family_count, fill = factor(year))) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  scale_y_continuous(
    limits = c(0, 8),
    breaks = 0:8  # only whole numbers from 0 to 12
  ) +
  labs(
    title = "Family count per zone by year",
    x = "Zone",
    y = "Number of unique families",
    fill = "Year"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

# Show plot
print(plot_zone_family)

# Count unique species per zone per year
zone_species_counts <- df %>%
  filter(coverage > 0) %>%  # only count species that are actually present
  group_by(zone, year) %>%
  summarise(species_count = n_distinct(name), .groups = "drop")

# Make zone a factor for clean ordering
zone_species_counts$zone <- factor(zone_species_counts$zone, levels = sort(unique(zone_species_counts$zone)))

# Plot
plot_zone_species <- ggplot(zone_species_counts, aes(x = zone, y = species_count, fill = factor(year))) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  scale_y_continuous(
    limits = c(0, 10),
    breaks = 0:10  # only whole numbers from 0 to 12
  ) +
  labs(
    title = "Unique species count per zone per year",
    x = "Zone",
    y = "Number of unique species",
    fill = "Year"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

# Show plot
print(plot_zone_species)

library(readxl)
library(dplyr)
library(tidyr)
library(writexl)
## Warning: package 'writexl' was built under R version 4.4.3
# Read the data
df <- read_excel("C:/Users/Arne/Downloads/Waarnemingen libellen (1).xlsx", sheet = "Sheet10")

# Summarize and make the data long
df_summary <- df %>%
  group_by(name, zone, year) %>%
  summarise(coverage = sum(coverage), .groups = "drop")

# Create the long format dataframe
df_long <- df_summary %>%
  arrange(zone, year, name)  # Sort the data for better presentation

# Write the long-format data to an Excel file
write_xlsx(df_long, "species_per_zone_year.xlsx")

# Show the resulting dataframe in the console
print(df_long)
## # A tibble: 58 × 4
##    name                    zone  year coverage
##    <chr>                  <dbl> <dbl>    <dbl>
##  1 Alnus glutinosa            1  2023       30
##  2 Salix cinerea              1  2023       70
##  3 Alnus glutinosa            1  2024       24
##  4 Salix alba                 1  2024        1
##  5 Salix cinerea              1  2024       75
##  6 Ceratophyllum demersum     2  2023        5
##  7 Humulus lupulus            2  2023        1
##  8 Mentha aquatica            2  2023        2
##  9 Nymphaea marliacea         2  2023        2
## 10 Phragmites australis       2  2023       90
## # ℹ 48 more rows
library(ggplot2)
library(dplyr)
library(tidyr)



df_presence <- df %>%
  mutate(present = 1) %>%
  dplyr::select(name, zone, year, present) %>%
  tidyr::pivot_wider(names_from = year, values_from = present, values_fill = 0)


# Adjust for actual year names (2023 and 2024)
df_presence <- df_presence %>%
  mutate(
    presence_combined = case_when(
      `2023` == 1 & `2024` == 1 ~ "Both",
      `2023` == 1 ~ "Year 2023",
      `2024` == 1 ~ "Year 2024",
      TRUE ~ "None"
    )
  )

# Reshape for plotting (x = zone, y = species)
plot_data <- df_presence %>%
  dplyr::select(name, zone, presence_combined)

# Plot
gg <- ggplot(plot_data, aes(x = factor(zone), y = name, fill = presence_combined)) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("Year 2023" = "blue", "Year 2024" = "red", "Both" = "purple", "None" = "white")) +
  labs(
    title = "Species Presence per Zone and Year",
    x = "Zone",
    y = "Species",
    fill = "Presence"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 12),
    axis.text.y = element_text(face = "italic")
  )

gg

# Load necessary libraries
library(dplyr)
library(tidyr)
library(vegan)  # For PCoA (cmdscale) and distance calculations
library(ggplot2)
library(RColorBrewer)

# Read and prepare the data (assuming 'df' is already loaded with your data)
# Summarize species coverage per zone and year
df_summary <- df %>%
  group_by(name, zone, year) %>%
  summarise(coverage = sum(coverage), .groups = "drop")

# Create a pivot table: Species as columns, zones and years as rows
df_pivot <- df_summary %>%
  spread(key = name, value = coverage, fill = 0) %>%
  dplyr::select(-zone, -year)

dist_matrix <- vegdist(df_pivot, method = "euclidean")


# Perform Principal Coordinates Analysis (PCoA)
pcoa_result <- cmdscale(dist_matrix, k = 2)  # k = 2 for 2-dimensional PCoA
pcoa_df <- as.data.frame(pcoa_result)

# Add zone and year information to the PCoA results
df_summary_zone_year <- df_summary %>%
  dplyr::select(zone, year) %>%
  distinct()

pcoa_df$zone <- df_summary_zone_year$zone
pcoa_df$year <- df_summary_zone_year$year

# Plot the PCoA results
ggplot(pcoa_df, aes(x = V1, y = V2, color = factor(zone), shape = factor(year))) +
  geom_point(size = 3) +
  scale_color_manual(values = RColorBrewer::brewer.pal(7, "Set1")) +  # Custom colors for zones
  labs(
    title = "PCoA of species composition across zones and years (Euclidian, Abundance excluded)",
    x = "PCoA Axis 1",
    y = "PCoA Axis 2",
    color = "Zone",
    shape = "Year"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, size = 12))

# Load necessary libraries
library(dplyr)
library(tidyr)
library(vegan)  # For PCoA (cmdscale) and distance calculations
library(ggplot2)
library(RColorBrewer)

# Data summary
df_summary <- df %>%
  group_by(name, zone, year) %>%
  summarise(coverage = sum(coverage), .groups = "drop")

# Create a pivot table: Species as columns, zones and years as rows
df_pivot <- df_summary %>%
  spread(key = name, value = coverage, fill = 0) %>%
  dplyr::select(-zone, -year)

# Optional: Scale the data before calculating distances (important for Euclidean distance)
df_pivot_scaled <- scale(df_pivot)  # Scaling the data to normalize species abundances

# Calculate Euclidean distance (including abundance, after scaling)
dist_matrix_euclidean <- dist(df_pivot_scaled, method = "euclidean")

# Perform PCoA for Euclidean distance (including abundance)
pcoa_result_euclidean <- cmdscale(dist_matrix_euclidean, k = 2)  # k = 2 for 2-dimensional PCoA
pcoa_df_euclidean <- as.data.frame(pcoa_result_euclidean)

# Add zone and year information to the PCoA results
df_summary_zone_year <- df_summary %>%
  dplyr::select(zone, year) %>%
  distinct()

pcoa_df_euclidean$zone <- df_summary_zone_year$zone
pcoa_df_euclidean$year <- df_summary_zone_year$year

# Plot PCoA results for Euclidean distance (abundance included)
ggplot(pcoa_df_euclidean, aes(x = V1, y = V2, color = factor(zone), shape = factor(year))) +
  geom_point(size = 3) +
  scale_color_manual(values = RColorBrewer::brewer.pal(7, "Set1")) +
  labs(
    title = "PCoA of species composition across zones and years (Euclidean, Abundance included)",
    x = "PCoA Axis 1",
    y = "PCoA Axis 2",
    color = "Zone",
    shape = "Year"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, size = 12))

HOPEFULLY ITS CLEAR THAT WE NOW GO BACK TO THE ODONATA AND LEAVE THE VEGETATION BEHIND!

library(readxl)
setwd("~/Luna")
Luna <- read_excel("Luna.xlsx")

df_wandel <- read_excel("Luna.xlsx", sheet = "Sheet8")


library(ggplot2)
library(dplyr)
library(tibble)

# Your data manually entered
df <- tribble(
  ~zone,      ~abundance,
  "zone 1",     74,
  "zone 2",    170,
  "zone 3",    279,
  "zone 4",    141,
  "zone 5",    114,
  "zone 6",     79,
  "zone 7",     26,
  "",           NA,        # visual gap
  "wandeling", 139
)

# Custom order including the gap
df$zone <- factor(df$zone, levels = df$zone)

# Plot with gap
ggplot(df, aes(x = zone, y = abundance)) +
  geom_bar(stat = "identity", fill = "steelblue", na.rm = TRUE) +
  labs(
    title = "Total abundance per zone",
    x = "Zone",
    y = "Total Abundance"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

library(ggplot2)
library(dplyr)
library(tibble)

# Species richness data manually entered
df <- tribble(
  ~zone,      ~richness,
  "zone 1",     15,
  "zone 2",     17,
  "zone 3",     20,
  "zone 4",     17,
  "zone 5",     17,
  "zone 6",     15,
  "zone 7",      9,
  "",           NA,        # visual gap
  "wandeling",  19
)

# Preserve plotting order
df$zone <- factor(df$zone, levels = df$zone)

# Barplot
ggplot(df, aes(x = zone, y = richness)) +
  geom_bar(stat = "identity", fill = "darkseagreen", na.rm = TRUE) +
  labs(
    title = "Species richness per zone",
    x = "Zone",
    y = "Species Richness"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ purrr     1.0.2
## ✔ lubridate 1.9.3     ✔ stringr   1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ gridExtra::combine() masks dplyr::combine()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ car::recode()        masks dplyr::recode()
## ✖ MASS::select()       masks dplyr::select()
## ✖ purrr::some()        masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read_excel("Luna.xlsx", sheet = "Sheet12")
## New names:
## • `Zone "walk"` -> `Zone "walk"...2`
## • `Zone 1` -> `Zone 1...3`
## • `Zone 2` -> `Zone 2...4`
## • `Zone 3` -> `Zone 3...5`
## • `Zone 4` -> `Zone 4...6`
## • `Zone 5` -> `Zone 5...7`
## • `Zone 6` -> `Zone 6...8`
## • `Zone 7` -> `Zone 7...9`
## • `Zone "walk"` -> `Zone "walk"...10`
## • `Zone 1` -> `Zone 1...11`
## • `Zone 2` -> `Zone 2...12`
## • `Zone 3` -> `Zone 3...13`
## • `Zone 4` -> `Zone 4...14`
## • `Zone 5` -> `Zone 5...15`
## • `Zone 6` -> `Zone 6...16`
## • `Zone 7` -> `Zone 7...17`
# Convert to long format
df_long <- df %>%
  pivot_longer(cols = starts_with("zone"), names_to = "zone", values_to = "abundance") %>%
  mutate(zone = str_replace(zone, "zone", ""))

# Plot
ggplot(df_long, aes(x = factor(zone), y = fct_rev(species), fill = abundance)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "blue", high = "red", na.value = "white") +
  labs(
    title = "Species abundance per zone",
    x = "Zone",
    y = "Species",
    fill = "Abundance"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    axis.text.y = element_text(face = "italic")
  )

Groen is aanwezig - rood is afwezig

library(tidyverse)
df <- read_excel("Luna.xlsx", sheet = "Sheet13")

# Convert to long format
df_long <- df %>%
  pivot_longer(cols = starts_with("zone"), names_to = "zone", values_to = "abundance") %>%
  mutate(zone = str_replace(zone, "zone", ""))

# Plot
ggplot(df_long, aes(x = factor(zone), y = fct_rev(species), fill = abundance)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "red", high = "green", na.value = "white") +
  labs(
    title = "Species abundance per zone",
    x = "Zone",
    y = "Species",
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    axis.text.y = element_text(face = "italic"),
    legend.position = "none"
  )

df <- read_excel("Luna.xlsx", sheet = "Sheet14")

# Load libraries
library(readxl)
library(tidyverse)
library(vegan)

# Step 2: Clean and prep the data
# We'll aggregate abundance per species per zone
df_clean <- df %>%
  filter(!is.na(notes)) %>%
  mutate(zone = notes) %>%
  group_by(scientific, zone) %>%
  summarise(abundance = sum(number), .groups = "drop") %>%
  pivot_wider(names_from = zone, values_from = abundance, values_fill = 0)

# Step 3: Make the species names row names (vegan prefers this)
df_matrix <- df_clean %>%
  column_to_rownames(var = "scientific")

# Step 4: Compute distance matrix (e.g., Bray-Curtis)
dist_matrix <- vegdist(df_matrix, method = "bray")

# Step 5: Perform PCoA
pcoa_result <- cmdscale(dist_matrix, k = 2, eig = TRUE)

# Step 6: Plot the results
pcoa_df <- as.data.frame(pcoa_result$points)
pcoa_df$species <- rownames(pcoa_df)

ggplot(pcoa_df, aes(x = V1, y = V2, label = species)) +
  geom_point() +
  geom_text(vjust = -0.5, hjust = 0.5, size = 3) +
  labs(
    title = "PCoA of Species Abundance by Zone",
    x = "PCoA Axis 1",
    y = "PCoA Axis 2"
  ) +
  theme_minimal()

# Load libraries
library(readxl)
library(tidyverse)
library(vegan)


# Step 2: Prepare the data — zones as rows, species as columns
df_clean <- df %>%
  filter(!is.na(notes)) %>%
  group_by(notes, scientific) %>%
  summarise(abundance = sum(number), .groups = "drop") %>%
  pivot_wider(names_from = scientific, values_from = abundance, values_fill = 0)

# Step 3: Set zones as row names
df_matrix <- df_clean %>%
  column_to_rownames(var = "notes")

# Step 4: Compute distance matrix (e.g., Bray-Curtis)
dist_matrix <- vegdist(df_matrix, method = "bray")

# Step 5: Perform PCoA
pcoa_result <- cmdscale(dist_matrix, k = 2, eig = TRUE)

# Step 6: Plot
pcoa_df <- as.data.frame(pcoa_result$points)
pcoa_df$zone <- rownames(pcoa_df)

ggplot(pcoa_df, aes(x = V1, y = V2, label = zone)) +
  geom_point(size = 3, color = "steelblue") +
  geom_text(vjust = -0.5, size = 3) +
  labs(
    title = "PCoA of Zones Based on Species Composition",
    x = "PCoA Axis 1",
    y = "PCoA Axis 2"
  ) +
  theme_minimal()

df <- read_excel("Luna.xlsx", sheet = "Sheet15")

sex_summary <- df %>%
  group_by(sex) %>%
  summarise(total = sum(number), .groups = "drop")

ggplot(sex_summary, aes(x = sex, y = total, fill = sex)) +
  geom_col(width = 0.6) +
  scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e", "U" = "#999999")) +
  labs(
    title = "Abundance per sex",
    x = "Sex",
    y = "Number of individuals"
  ) +
  theme_minimal()

library(readxl)
library(tidyverse)

df %>%
  group_by(date, sex) %>%
  summarise(total = sum(number), .groups = "drop") %>%
  ggplot(aes(x = date, y = total, fill = sex)) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6) +
  scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e", "U" = "#999999")) +
  labs(
    title = "Abundance per sex and date",
    x = "Date",
    y = "Number of individuals",
    fill = "Sex"
  ) +
  theme_minimal()

df %>%
  group_by(date, sex) %>%
  summarise(total = sum(number), .groups = "drop") %>%
  ggplot(aes(x = date, y = total)) +
  geom_col(aes(fill = sex), position = position_dodge(width = 0.7), width = 0.6) +
  scale_fill_manual(values = c("M" = "#1f77b4", "F" = "#ff7f0e", "U" = "#999999")) +
  labs(
    title = "Abundance per sex and date",
    x = "Date",
    y = "Number of individuals",
    fill = "Sex",
    color = "Sex"
  ) +
  facet_wrap(~ sex, scales = "free_y") +  # Create separate plots for each sex
  theme_minimal() +
  theme(
    legend.position = "none",  # Optionally remove legend for cleaner output
    axis.text.x = element_text(angle = 90, hjust = 1)  # Rotate x-axis labels by 90 degrees
  )

library(readxl)
library(tidyr)
library(dplyr)
library(ggplot2)

df <- read_excel("Luna.xlsx", sheet = "Sheet18")

# Rename the first column to 'dragonfly' (if needed)
colnames(df)[1] <- "dragonfly"

# Reshape from wide to long format
df_long <- df %>%
  pivot_longer(
    cols = -dragonfly,
    names_to = "plant",
    values_to = "count"
  ) %>%
  filter(!is.na(count) & count > 0)

# Create dot plot (like a checklist)
ggplot(df_long, aes(x = plant, y = dragonfly)) +
  geom_point(shape = 21, fill = "#1f77b4", size = 4, stroke = 0.5) +
  labs(
    title = "Odonata Species Associated with Plant Species",
    x = "Plant Species",
    y = "Odonata Species"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

library(readxl)
library(tidyverse)


# Rename first column to 'odonata' for clarity
df <- df %>%
  rename(odonata = 1)

# Convert to long format
df_long <- df %>%
  pivot_longer(
    cols = -odonata,
    names_to = "plant",
    values_to = "abundance"
  )

# Plot as heatmap-style tile plot
ggplot(df_long, aes(x = plant, y = fct_rev(odonata), fill = abundance)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "blue", high = "red", na.value = "white") +
  labs(
    title = "Odonata Species Abundance per Plant Species",
    x = "Plant Species",
    y = "Odonata Species",
    fill = "Abundance"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(face = "italic", size = 8)
  )

library(readxl)
library(tidyverse)

# Rename first column to 'odonata'
df <- df %>%
  rename(odonata = 1)

# Convert to long format and remove NAs
df_long <- df %>%
  pivot_longer(
    cols = -odonata,
    names_to = "plant",
    values_to = "abundance"
  ) %>%
  filter(!is.na(abundance))

# Plot the tile map
ggplot(df_long, aes(x = plant, y = fct_rev(odonata), fill = abundance)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "blue", high = "red") +
  labs(
    title = "Odonata Species Abundance per Plant Species",
    x = "Plant Species",
    y = "Odonata Species",
    fill = "Abundance"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(face = "italic", size = 8)
  )

df <- read_excel("Luna.xlsx", sheet = "Sheet20")

library(readxl)
library(tidyverse)
library(lubridate)


# Convert 'time' column to proper time format (assuming it's HH:MM:SS)
df <- df %>%
  mutate(time = as.POSIXct(time, format = "%H:%M:%S"))

ggplot(df, aes(x = time, y = number)) +
  geom_point(size = 2, alpha = 0.8, color = "#1f77b4") +
  geom_smooth(method = "loess", color = "#08306b", se = FALSE, size = 1.5) +
  scale_x_datetime(date_labels = "%H:%M") +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) + 
  labs(
    title = "Abundance Over Time",
    x = "Time of Day",
    y = "Number of Individuals"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).