Analysis of microplastics in river mammals
# install.packages("pacman")
# writeLines(pacman::p_lib(), "~/Desktop/list_of_R_packages.csv") # to quickly back up packages
# remotes::install_github("ThinkR-open/remedy")
# remotes::install_github("luisDVA/annotater")
pacman::p_load(rio, tidyverse,janitor, data.table, here,
knitr,
kableExtra,
flextable,
writexl,
sf,
raster,
osmdata,
leaflet,
rnaturalearthdata,
rnaturalearth,
cowplot,
dunn.test,
ggthemes,
effsize,
rstatix,
stats,
viridis,
RCzechia,
stringr,
scales,
coin,
httr,
terra,
czso,
grateful,
annotater,
officer
) # just add needed packages to this line and Pacman will install and load them.
#
# micro <- raw %>%
# dplyr::mutate(across(-1, ~as.numeric(.))) %>%
# dplyr::mutate(across(-1, ~replace_na(. , 0))) %>%
#
# #add country
#
# dplyr::mutate(
# country = dplyr::case_when(
# grepl("^BC", sample) ~ "Spain_dec2022",
# grepl("^TA", sample) ~ "Spain_dec2022",
# grepl("^7_5", sample) ~ "Austria",
# grepl("^LLESP", sample) ~ "Spain_jun2022",
# grepl("^MA", sample) ~ "Spain_2023",
# TRUE ~ NA_character_
# )
# )
#
# # add country2
#
# micro2 <- micro %>%
# dplyr::left_join(cz,by="sample",relationship = "many-to-many") %>%
# dplyr::left_join(italy,by="sample",relationship = "many-to-many") %>%
# dplyr::mutate (country = coalesce(country, country.x, country.y)) %>%
# dplyr::select(-country.x, -country.y) %>%
# janitor::clean_names() %>%
# dplyr::mutate(country = as.factor(country))
#
#
# # Add new data
#
# # Reshape the data into long format
# newdata_long <- newdataraw %>%
# tidyr::pivot_longer(cols = !1, names_to = "sample", values_to = "Value") # Excluding the first column which might be the row names
#
# # Now, transpose the data into wide format, making each former row a column
# transposed_newdata <- newdata_long %>%
# tidyr::pivot_wider(names_from = V1, values_from = Value)
#
# #add country
#
# newdata <- transposed_newdata %>%
# dplyr::mutate(across(-1, ~as.numeric(.))) %>%
# dplyr::mutate(across(-1, ~replace_na(. , 0))) %>%
#
# dplyr::mutate(
# country = dplyr::case_when(
# grepl("^BC", sample) ~ "Spain_dec2022",
# grepl("^nrsI", sample) ~ "Netherlands",
# TRUE ~ NA_character_
# )
# )%>%
# janitor::clean_names()
#
# # Add rows
# micro3 <- bind_rows(micro2, newdata)
#
# # Translate
#
# eng <- c("sample", "fibers", "fragments", "spheres", "films", "pellets", "sponges", "rubbers", "types_items", "dry_weight", "country")
#
#
# micro4 <- micro3 %>%
# dplyr::rename(
# Fibers = fibras,
# Fragments = fragmentos,
# Spheres = esferas,
# Films = films,
# Pellets = pellets,
# Sponges = esponjas,
# Rubbers = gomas,
# ItemTypes = tipos_items,
# DryWeight = peso_seco,
# Country = country
# ) %>%
# janitor::clean_names() %>%
# dplyr::mutate(
# sample = as.factor(sample),
# country = as.factor(country)
# )
#
# summary(micro4)
#
# saveRDS(micro4, here::here("data", "micro4.rds"))
# micro4 <-readRDS(here::here("data", "micro4.rds"))
#
# # Finding all duplicated rows
# repeated_rows <- micro4 %>%
# dplyr::filter(duplicated(.) | duplicated(., fromLast = TRUE))
#
# repeated_rows
#
#
# # Remove duplicate rows, keeping only the unique ones
# micro5 <- micro4 %>%
# distinct()
#
# # Identify repeated sample names in 'micro5'
# repeated_sample_names <- micro5 %>%
# dplyr::group_by(sample) %>%
# dplyr::summarise(count = n()) %>%
# dplyr::filter(count > 1) %>%
# dplyr::pull(sample)
#
# # Filter 'micro5' for these repeated sample names to show full rows
# full_rows_repeated_samples <- micro5 %>%
# dplyr::filter(sample %in% repeated_sample_names)
#
#
# full_rows_repeated_samples
#
# write_xlsx(micro5, here::here("data", "micro5.xlsx"))
# summary(micro5)
#
# # Check repeated values
# # Max fragments 60 in one sample?
# # Max weight 81g?
# # Falta GEO
#
#
# # Boxplot for dry_weight
# ggplot(micro5, aes(y = dry_weight)) +
# geom_boxplot()
#
# # Any missing values?
# colSums(is.na(micro5))
#
Manual editing to raw2.csv (directly in excel)
raw <- rio::import(here::here("data", "raw2.csv"))
# newdataraw <- rio::import(here::here("data", "newdata.csv")) %>%
# slice(-n())
#
# italy <- rio::import(here::here("data", "italy.csv"))
# cz <- rio::import(here::here("data", "cz.csv"))
summary(raw)
## sample fibers fragments spheres
## Length:346 Min. : 1.000 Min. : 1 Min. :2
## Class :character 1st Qu.: 1.000 1st Qu.: 1 1st Qu.:2
## Mode :character Median : 1.000 Median : 1 Median :2
## Mean : 2.033 Mean : 7 Mean :2
## 3rd Qu.: 2.000 3rd Qu.: 1 3rd Qu.:2
## Max. :12.000 Max. :60 Max. :2
## NA's :286 NA's :336 NA's :345
## films pellets sponges rubbers item_types
## Min. :1.000 Mode:logical Min. :1.000 Mode:logical Min. :0.0000
## 1st Qu.:1.000 NA's:346 1st Qu.:1.000 NA's:346 1st Qu.:0.0000
## Median :1.000 Median :1.000 Median :0.0000
## Mean :1.364 Mean :1.333 Mean :0.2543
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :5.000 Max. :3.000 Max. :2.0000
## NA's :335 NA's :340
## dry_weight density (items/dry_weight) country
## Min. : 0.0900 Min. : 0.0000 Length:346
## 1st Qu.: 0.4125 1st Qu.: 0.0000 Class :character
## Median : 0.8400 Median : 0.0000 Mode :character
## Mean : 2.0121 Mean : 0.3664
## 3rd Qu.: 1.5725 3rd Qu.: 0.0000
## Max. :81.2600 Max. :10.0000
##
## geo_system lat lon
## Length:346 Min. :-1208878 Min. :-861924.0
## Class :character 1st Qu.:-1130945 1st Qu.:-653396.0
## Mode :character Median :-1050191 Median :-473460.0
## Mean : 426109 Mean :-259399.2
## 3rd Qu.: 53 3rd Qu.: 14.5
## Max. : 4849533 Max. : 725585.0
## NA's :9 NA's :9
data <- raw %>%
dplyr::mutate(pellets = as.numeric(pellets),
rubbers = as.numeric(rubbers),
sample = as.factor(sample),
country = as.factor(country),
geo_system = as.factor(geo_system)) %>%
dplyr::mutate_at(vars(fibers:rubbers), ~tidyr::replace_na(., 0)) %>%
dplyr::mutate(total_mp = fibers + fragments + spheres + films + sponges,
concentration = total_mp / dry_weight) %>%
dplyr::mutate(country = dplyr::recode(country, "Czech Republic" = "Czechia",
"The Netherlands" = "Netherlands"))
summary(data)
## sample fibers fragments spheres
## 5753/6 : 2 Min. : 0.0000 Min. : 0.0000 Min. :0.00000
## 5768/1 : 2 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.:0.00000
## 5943/1 : 2 Median : 0.0000 Median : 0.0000 Median :0.00000
## 5957/4 : 2 Mean : 0.3526 Mean : 0.2023 Mean :0.00578
## 6371/4 : 2 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.:0.00000
## 6374/3 : 2 Max. :12.0000 Max. :60.0000 Max. :2.00000
## (Other):334
## films pellets sponges rubbers item_types
## Min. :0.00000 Min. :0 Min. :0.00000 Min. :0 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0 1st Qu.:0.00000 1st Qu.:0 1st Qu.:0.0000
## Median :0.00000 Median :0 Median :0.00000 Median :0 Median :0.0000
## Mean :0.04335 Mean :0 Mean :0.02312 Mean :0 Mean :0.2543
## 3rd Qu.:0.00000 3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0 3rd Qu.:0.0000
## Max. :5.00000 Max. :0 Max. :3.00000 Max. :0 Max. :2.0000
##
## dry_weight density (items/dry_weight) country
## Min. : 0.0900 Min. : 0.0000 Austria : 9
## 1st Qu.: 0.4125 1st Qu.: 0.0000 Czechia :196
## Median : 0.8400 Median : 0.0000 Italy : 39
## Mean : 2.0121 Mean : 0.3664 Spain : 82
## 3rd Qu.: 1.5725 3rd Qu.: 0.0000 Netherlands: 20
## Max. :81.2600 Max. :10.0000
##
## geo_system lat lon
## Not available : 9 Min. :-1208878 Min. :-861924.0
## S-JTSK :191 1st Qu.:-1130945 1st Qu.:-653396.0
## UTM ETRS89 HUSO 29: 4 Median :-1050191 Median :-473460.0
## UTM ETRS89 HUSO 30: 74 Mean : 426109 Mean :-259399.2
## WGS84, Google Map : 68 3rd Qu.: 53 3rd Qu.: 14.5
## Max. : 4849533 Max. : 725585.0
## NA's :9 NA's :9
## total_mp concentration
## Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 0.0000 Median : 0.0000
## Mean : 0.6272 Mean : 0.7206
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :60.0000 Max. :78.9474
##
write_xlsx(data, here::here("data", "data_clean.xlsx"))
write_csv(data, here::here("data", "data_clean.csv"))
Here we filter the data for which we have coordinates and transform all of them to WGS84 16/10/24 - 4 from Spain and 5 from CZ are missing.
# Define a mapping of geo_system values to EPSG codes
geo_system_mapping <- tibble::tibble(
geo_system = c("UTM ETRS89 HUSO 29", "UTM ETRS89 HUSO 30", "S-JTSK", "WGS84"),
epsg = c(25829, 25830, 5514, 4326)
)
# Function to transform coordinates
transform_coords <- function(df, original_crs) {
# Skip if the subset is empty or CRS is not defined
if (nrow(df) == 0 || is.na(original_crs)) {
return(df)
}
# Create sf object and transform coordinates
df_sf <- sf::st_as_sf(df, coords = c("lon", "lat"), crs = original_crs, remove = FALSE)
df_sf_transformed <- sf::st_transform(df_sf, crs = 4326)
return(cbind(df, sf::st_coordinates(df_sf_transformed)[, 1:2]))
}
# Transform all coordinates to WGS 84
data_wgs84 <- data %>%
dplyr::mutate(geo_system = dplyr::recode(geo_system, `WGS84, Google Map` = "WGS84")) %>%
dplyr::filter(geo_system != "Not available", !is.na(lon), !is.na(lat)) %>%
dplyr::left_join(geo_system_mapping, by = "geo_system") %>%
dplyr::group_by(geo_system) %>%
dplyr::group_modify(~ transform_coords(.x, unique(.x$epsg)[1])) %>%
dplyr::ungroup() %>%
dplyr::select(-c(epsg, geo_system,lat,lon))
# Check the results
head(data_wgs84)
## # A tibble: 6 × 16
## sample fibers fragments spheres films pellets sponges rubbers item_types
## <fct> <int> <int> <int> <int> <dbl> <int> <dbl> <int>
## 1 5668/5 0 0 0 0 0 0 0 0
## 2 5669/2 0 0 0 0 0 0 0 0
## 3 5768/1 0 0 0 0 0 0 0 0
## 4 5943/1 0 0 0 0 0 0 0 0
## 5 5753/2 1 0 0 0 0 0 0 1
## 6 5753/4 0 0 0 0 0 0 0 0
## # ℹ 7 more variables: dry_weight <dbl>, `density (items/dry_weight)` <dbl>,
## # country <fct>, total_mp <int>, concentration <dbl>, X <dbl>, Y <dbl>
summary(data_wgs84)
## sample fibers fragments spheres
## 5753/6 : 2 Min. : 0.000 Min. : 0.0000 Min. :0.000000
## 5768/1 : 2 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:0.000000
## 5943/1 : 2 Median : 0.000 Median : 0.0000 Median :0.000000
## 5957/4 : 2 Mean : 0.362 Mean : 0.2047 Mean :0.005935
## 6371/4 : 2 3rd Qu.: 0.000 3rd Qu.: 0.0000 3rd Qu.:0.000000
## 6374/3 : 2 Max. :12.000 Max. :60.0000 Max. :2.000000
## (Other):325
## films pellets sponges rubbers item_types
## Min. :0.00000 Min. :0 Min. :0.0000 Min. :0 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0 1st Qu.:0.0000
## Median :0.00000 Median :0 Median :0.0000 Median :0 Median :0.0000
## Mean :0.04154 Mean :0 Mean :0.0178 Mean :0 Mean :0.2493
## 3rd Qu.:0.00000 3rd Qu.:0 3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:0.0000
## Max. :5.00000 Max. :0 Max. :3.0000 Max. :0 Max. :2.0000
##
## dry_weight density (items/dry_weight) country
## Min. : 0.100 Min. : 0.0000 Austria : 9
## 1st Qu.: 0.410 1st Qu.: 0.0000 Czechia :191
## Median : 0.830 Median : 0.0000 Italy : 39
## Mean : 2.029 Mean : 0.3659 Spain : 78
## 3rd Qu.: 1.550 3rd Qu.: 0.0000 Netherlands: 20
## Max. :81.260 Max. :10.0000
##
## total_mp concentration X Y
## Min. : 0.0000 Min. : 0.0000 Min. :-7.935 Min. :36.58
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 6.695 1st Qu.:42.16
## Median : 0.0000 Median : 0.0000 Median :14.342 Median :48.98
## Mean : 0.6321 Mean : 0.7295 Mean :10.547 Mean :46.87
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.:16.885 3rd Qu.:49.70
## Max. :60.0000 Max. :78.9474 Max. :18.758 Max. :53.16
##
write_xlsx(data_wgs84, here::here("data", "data_wgs84.xlsx"))
Looks like for Italy we have just three rough areas for all the 39 samples.
# Create a leaflet map
map <- leaflet(data_wgs84) %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng = ~X, lat = ~Y, popup = ~as.character(sample))
# Show the map
map
# # In Italy we have no streams, just 3 areas
#
# italy_temp <- data_wgs84 %>%
# dplyr::filter(country == "Italy")
#
#
# # Create a leaflet map
# map2 <- leaflet(italy_temp) %>%
# addTiles() %>% # Add default OpenStreetMap map tiles
# addMarkers(lng = ~X, lat = ~Y, popup = ~as.character(sample))
#
# # Show the map
# map2
Fig. 1 - Location of sampling sites in Austria, Czechia, Italy, the Netherlands and Spain.
# Get world map data
world <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf")
# # Ensure the country names in your data match those in the world map data
# data_wgs84$country <- dplyr::recode(data_wgs84$country,
# "Czech Republic" = "Czechia",
# "The Netherlands" = "Netherlands")
# Define the countries with samples
countries_with_samples <- unique(data_wgs84$country)
# Create the main map
main_map <- ggplot() +
geom_sf(data = world, fill = "gray90", color = "white") +
geom_sf(data = world[world$name %in% countries_with_samples,], aes(fill = name), color = "white") +
geom_point(data = data_wgs84, aes(x = X, y = Y), color = "black", size = 1, alpha = 0.7) +
coord_sf(xlim = c(-10, 30), ylim = c(35, 55), expand = FALSE) +
scale_fill_brewer(palette = "Set2") +
theme_void() + # Removes axis lines, ticks, and labels
theme(legend.position = "bottom",
legend.title = element_blank(),
plot.title = element_text(size = 14, hjust = 0.5, face = "bold"),
panel.background = element_rect(fill = "gray98"),
panel.grid = element_blank()) # +
#labs(title = "Microplastic Sampling Sites in Europe")
# Print the map
print(main_map)
# Create an inset map of Europe
# inset_map <- ggplot2::ggplot() +
# ggplot2::geom_sf(data = world, fill = "gray90", color = "white") +
# ggplot2::geom_sf(data = world[world$name %in% countries_with_samples,],
# ggplot2::aes(fill = name), color = "white") +
# ggplot2::coord_sf(xlim = c(-25, 40), ylim = c(35, 70), expand = FALSE) +
# ggplot2::theme_void() +
# ggplot2::theme(legend.position = "none")
# Combine the main map and inset map
# final_map <- cowplot::ggdraw() +
# cowplot::draw_plot(main_map) +
# cowplot::draw_plot(inset_map, x = 0.65, y = 0.65, width = 0.3, height = 0.3)
# # Display the map
# print(final_map)
#
# # Save the map
# ggplot2::ggsave("europe_microplastic_sampling_map.png", final_map, width = 12, height = 10, dpi = 300)
Exploration: How do microplastics vary across the land in CZ?
# Filter data for Czechia
cz_data <- data_wgs84 %>%
dplyr::filter(country == "Czechia") %>%
dplyr::mutate(total_microplastics = fibers + fragments + spheres + films + pellets + sponges + rubbers) %>%
dplyr::mutate(density = as.numeric(`density (items/dry_weight)`)) %>%
sf::st_as_sf(coords = c("X", "Y"), crs = 4326)
# Get the rivers data for the Czech Republic
cz_rivers <- RCzechia::reky()
# Heatmap Czechia
rivers_cz <- ggplot2::ggplot(cz_data) +
ggplot2::geom_sf(data = cz_rivers, color = "blue", size = 0.5, alpha = 0.5) + # Add rivers layer
ggplot2::geom_sf(ggplot2::aes(size = total_microplastics, color = density), alpha = 0.7) +
ggplot2::scale_color_viridis_c(option = "magma", name = "Density") + # Color represents density
ggplot2::scale_size_continuous(name = "Total Items") + # Size represents total number of microplastics
ggplot2::theme_minimal() +
ggplot2::labs(title = "Distribution of Microplastics in Otter Spraints (Czechia)")
# Save the plot
ggplot2::ggsave(here::here("output", "rivers_cz.png"), width = 10, height = 6, dpi = 300)
There has to be an effect of sampling.
# Create a color palette function
pal <- leaflet::colorNumeric(
palette = viridis::viridis(256), # Use the viridis color scale
domain = cz_data$total_microplastics # The domain is the range of microplastics
)
# Create an interactive map with leaflet
cz_map <- cz_data %>%
leaflet::leaflet() %>%
leaflet::addTiles() %>% # Add default OpenStreetMap tiles
leaflet::addProviderTiles(providers$Esri.WorldTopoMap) %>% # Add a topographic basemap with rivers and ponds
leaflet::addCircleMarkers(
radius = ~sqrt(total_microplastics) * 3, # Marker size proportional to microplastics
color = ~pal(total_microplastics), # Use the color palette for the marker color
fillOpacity = 0.8,
label = ~paste("Microplastics:", total_microplastics),
popup = ~paste("Sample ID:", sample, "<br>",
"Fibers:", fibers, "<br>",
"Fragments:", fragments, "<br>",
"Spheres:", spheres, "<br>",
"Films:", films)
) %>%
leaflet::addLegend(
"bottomright",
pal = pal,
values = cz_data$total_microplastics,
title = "Microplastics",
opacity = 1
)
cz_map
Information for microplastics recovered from spraints collected in each country, including number of samples, abundance, type of microplastic, dry weight of each sample and average density.
# Summarise overall data
data_summary <- dplyr::summarise(
data,
`Samples (n)` = dplyr::n(),
Abundance = sum(fibers + fragments + spheres + films + pellets + sponges + rubbers),
SE = round(sd(fibers + fragments + spheres + films + pellets + sponges + rubbers) / sqrt(dplyr::n()), 2),
dplyr::across(c(fibers, fragments, spheres, films, pellets, sponges, rubbers, dry_weight), sum, .names = "sum_{col}"),
`Average Density` = round(mean(`density (items/dry_weight)`), 2)
) %>%
dplyr::select(where(~sum(.) != 0))
# Rename columns for Table1
data_table1 <- dplyr::rename(
data_summary,
Fibers = sum_fibers,
Fragments = sum_fragments,
Spheres = sum_spheres,
Films = sum_films,
Sponges = sum_sponges,
`Dry Weight (g)` = sum_dry_weight
)
# Summarise by country
data_summary2 <- dplyr::group_by(data, country) %>%
dplyr::summarise(
`Samples (n)` = dplyr::n(),
Abundance = sum(fibers + fragments + spheres + films + pellets + sponges + rubbers),
SE = round(sd(fibers + fragments + spheres + films + pellets + sponges + rubbers) / sqrt(dplyr::n()), 2),
dplyr::across(c(fibers, fragments, spheres, films, pellets, sponges, rubbers, dry_weight), sum, .names = "sum_{col}"),
`Average Density` = round(mean(`density (items/dry_weight)`), 2)
) %>%
dplyr::select(-starts_with("sum_pellets"), -starts_with("sum_rubbers"))
# Rename columns for Table2
data_table2 <- dplyr::rename(
data_summary2,
Country = country,
Fibers = sum_fibers,
Fragments = sum_fragments,
Spheres = sum_spheres,
Films = sum_films,
Sponges = sum_sponges,
`Dry Weight (g)` = sum_dry_weight
)
# Combine both tables
combined_table <- dplyr::bind_rows(data_table2, dplyr::mutate(data_table1, Country = "Total"))
# Display the table
combined_table %>%
kableExtra::kbl() %>%
kableExtra::kable_paper("striped", full_width = F) %>%
kableExtra::row_spec(nrow(combined_table), bold = TRUE)
| Country | Samples (n) | Abundance | SE | Fibers | Fragments | Spheres | Films | Sponges | Dry Weight (g) | Average Density |
|---|---|---|---|---|---|---|---|---|---|---|
| Austria | 9 | 60 | 6.67 | 0 | 60 | 0 | 0 | 0 | 8.08 | 0.15 |
| Czechia | 196 | 52 | 0.05 | 45 | 0 | 0 | 6 | 1 | 222.49 | 0.36 |
| Italy | 39 | 9 | 0.08 | 8 | 1 | 0 | 0 | 0 | 54.75 | 0.35 |
| Spain | 82 | 96 | 0.25 | 69 | 9 | 2 | 9 | 7 | 403.89 | 0.49 |
| Netherlands | 20 | 0 | 0.00 | 0 | 0 | 0 | 0 | 0 | 6.98 | 0.00 |
| Total | 346 | 217 | 0.19 | 122 | 70 | 2 | 15 | 8 | 696.19 | 0.37 |
# Format the table
formatted_table <- combined_table %>%
dplyr::mutate(dplyr::across(where(is.numeric), ~round(., 2))) %>%
dplyr::mutate(`Average Density` = sprintf("%.2f", `Average Density`))
# Create the flextable
ft <- flextable::flextable(formatted_table) %>%
flextable::theme_booktabs() %>%
flextable::autofit() %>%
flextable::align(align = "center", part = "all") %>%
flextable::align(j = 1, align = "left", part = "all") %>%
flextable::bold(part = "header") %>%
flextable::bold(i = nrow(formatted_table), part = "body") %>%
flextable::set_header_labels(
Country = "Country",
`Samples (n)` = "Samples (n)",
Abundance = "Abundance",
SE = "SE",
Fibers = "Fibers",
Fragments = "Fragments",
Spheres = "Spheres",
Films = "Films",
Sponges = "Sponges",
`Dry Weight (g)` = "Dry Weight (g)",
`Average Density` = "Average Density"
) %>%
flextable::add_footer_lines(
"Table 1: Summary of microplastic occurrences in Eurasian otter (Lutra lutra) spraints across different European countries. The table shows the number of samples analyzed, total abundance of microplastics, standard error (SE), counts of different microplastic types (fibers, fragments, spheres, films, sponges), total dry weight of all samples per country, and average density of microplastics per gram of dry weight for each country and overall."
) %>%
flextable::fontsize(size = 8, part = "all") %>%
flextable::padding(padding = 2, part = "all") %>%
flextable::border(border = officer::fp_border(color = "black", width = 1), part = "all")
# Set page layout to match STOTEN guidelines (assuming A4 page size)
ft <- flextable::set_table_properties(
ft,
layout = "autofit",
width = 1 # Width in inches, adjust as needed
)
# Save the table as a Word document in the output folder
flextable::save_as_docx(ft, path = here::here("output", "table1_microplastics_summary.docx"))
ft
Country | Samples (n) | Abundance | SE | Fibers | Fragments | Spheres | Films | Sponges | Dry Weight (g) | Average Density |
|---|---|---|---|---|---|---|---|---|---|---|
Austria | 9 | 60 | 6.67 | 0 | 60 | 0 | 0 | 0 | 8.08 | 0.15 |
Czechia | 196 | 52 | 0.05 | 45 | 0 | 0 | 6 | 1 | 222.49 | 0.36 |
Italy | 39 | 9 | 0.08 | 8 | 1 | 0 | 0 | 0 | 54.75 | 0.35 |
Spain | 82 | 96 | 0.25 | 69 | 9 | 2 | 9 | 7 | 403.89 | 0.49 |
Netherlands | 20 | 0 | 0.00 | 0 | 0 | 0 | 0 | 0 | 6.98 | 0.00 |
Total | 346 | 217 | 0.19 | 122 | 70 | 2 | 15 | 8 | 696.19 | 0.37 |
Table 1: Summary of microplastic occurrences in Eurasian otter (Lutra lutra) spraints across different European countries. The table shows the number of samples analyzed, total abundance of microplastics, standard error (SE), counts of different microplastic types (fibers, fragments, spheres, films, sponges), total dry weight of all samples per country, and average density of microplastics per gram of dry weight for each country and overall. | ||||||||||
# Calculate presence/absence and percentage of spraints with microplastics
presence_absence <- data %>%
dplyr::mutate(has_mp = dplyr::if_else(fibers + fragments + spheres + films + sponges > 0, 1, 0)) %>%
dplyr::summarise(
total_samples = dplyr::n(),
samples_with_mp = sum(has_mp),
percentage_with_mp = mean(has_mp) * 100
)
# Calculate abundance (MPs/spraint)
abundance_stats <- data %>%
dplyr::mutate(total_mp = fibers + fragments + spheres + films + sponges) %>%
dplyr::summarise(
mean_abundance = mean(total_mp),
se_abundance = stats::sd(total_mp) / sqrt(dplyr::n())
)
# Calculate concentration (MPs/g dry weight)
concentration_stats <- data %>%
dplyr::mutate(total_mp = fibers + fragments + spheres + films + sponges,
concentration = total_mp / dry_weight) %>%
dplyr::summarise(
mean_concentration = mean(concentration),
se_concentration = stats::sd(concentration) / sqrt(dplyr::n())
)
# Calculate proportions of particle types
particle_proportions <- data %>%
dplyr::summarise(dplyr::across(c(fibers, fragments, spheres, films, sponges), sum)) %>%
tidyr::pivot_longer(dplyr::everything(), names_to = "particle_type", values_to = "count") %>%
dplyr::mutate(proportion = count / sum(count) * 100) %>%
dplyr::arrange(dplyr::desc(proportion))
# Paragraph for results
report_text <- paste(
"We found microplastics in", round(presence_absence$percentage_with_mp, 1), "% of spraints, at a mean abundance of", round(abundance_stats$mean_abundance, 1), "±", round(abundance_stats$se_abundance, 1),
"microplastics (MPs)/spraint (mean ± SE),and a mean concentration of", round(concentration_stats$mean_concentration, 1), "±", round(concentration_stats$se_concentration, 1),
"MPs/g (dry weight).\n\n")
# Print or write the report text to a file
cat(report_text)
## We found microplastics in 22.8 % of spraints, at a mean abundance of 0.6 ± 0.2 microplastics (MPs)/spraint (mean ± SE),and a mean concentration of 0.7 ± 0.2 MPs/g (dry weight).
# Visual inspection using Q-Q plot
ggplot2::ggplot(data, aes(sample = concentration)) +
stat_qq() +
stat_qq_line() +
labs(title = "Q-Q Plot of Microplastic Concentrations",
x = "Theoretical Quantiles",
y = "Sample Quantiles")
# Shapiro-Wilk test for normality
shapiro_test <- stats::shapiro.test(data$concentration)
print(shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: data$concentration
## W = 0.12528, p-value < 2.2e-16
# Attempt log transformation
data <- data %>%
dplyr::mutate(log_concentration = log(concentration))
# Q-Q plot for log-transformed data
ggplot2::ggplot(data, aes(sample = log_concentration)) +
stat_qq() +
stat_qq_line() +
labs(title = "Q-Q Plot of Log-Transformed Microplastic Concentrations",
x = "Theoretical Quantiles",
y = "Sample Quantiles")
Non-parametric
# Remove countries with no variation in microplastic concentration (Netherlands)
data_filtered <- data %>%
dplyr::group_by(country) %>%
dplyr::filter(stats::var(concentration) > 0) %>%
dplyr::ungroup()
# Kruskal-Wallis test with effect size
kruskal_result <- data_filtered %>%
rstatix::kruskal_test(concentration ~ country) %>%
rstatix::add_significance()
# Calculate effect size
effect_size <- data_filtered %>%
rstatix::kruskal_effsize(concentration ~ country)
# Pairwise Wilcoxon rank sum test with Bonferroni correction
pairwise_wilcox <- stats::pairwise.wilcox.test(data_filtered$concentration,
data_filtered$country,
p.adjust.method = "bonferroni")
# Calculate proportions of samples with microplastics
proportion_with_mp <- data %>%
dplyr::filter(country != "Netherlands") %>%
dplyr::group_by(country) %>%
dplyr::summarise(
total_samples = dplyr::n(),
samples_with_mp = sum(concentration > 0),
proportion = samples_with_mp / total_samples
)
kruskal_result
## # A tibble: 1 × 7
## .y. n statistic df p method p.signif
## <chr> <int> <dbl> <int> <dbl> <chr> <chr>
## 1 concentration 326 26.1 3 0.00000929 Kruskal-Wallis ****
effect_size
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 concentration 326 0.0716 eta2[H] moderate
pairwise_wilcox
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: data_filtered$concentration and data_filtered$country
##
## Austria Czechia Italy
## Czechia 1.000 - -
## Italy 1.000 1.000 -
## Spain 0.620 5.6e-06 0.043
##
## P value adjustment method: bonferroni
proportion_with_mp
## # A tibble: 4 × 4
## country total_samples samples_with_mp proportion
## <fct> <int> <int> <dbl>
## 1 Austria 9 1 0.111
## 2 Czechia 196 32 0.163
## 3 Italy 39 8 0.205
## 4 Spain 82 38 0.463
# kruskal_result
# # A tibble: 1 × 7
# .y. n statistic df p method p.signif
# <chr> <int> <dbl> <int> <dbl> <chr> <chr>
# 1 concentration 326 26.1 3 0.00000929 Kruskal-Wallis ****
# > effect_size
# # A tibble: 1 × 5
# .y. n effsize method magnitude
# * <chr> <int> <dbl> <chr> <ord>
# 1 concentration 326 0.0716 eta2[H] moderate
# > pairwise_wilcox
#
# Pairwise comparisons using Wilcoxon rank sum test with continuity correction
#
# data: data_filtered$concentration and data_filtered$country
#
# Austria Czechia Italy
# Czechia 1.000 - -
# Italy 1.000 1.000 -
# Spain 0.620 5.6e-06 0.043
#
# P value adjustment method: bonferroni
# > proportion_with_mp
# # A tibble: 4 × 4
# country total_samples samples_with_mp proportion
# <fct> <int> <int> <dbl>
# 1 Austria 9 1 0.111
# 2 Czechia 196 32 0.163
# 3 Italy 39 8 0.205
# 4 Spain 82 38 0.463
# Results and Interpretations
#' Microplastic Concentration Differences Among Countries
#'
#' The Kruskal-Wallis test revealed significant differences in microplastic
#' concentrations among the studied countries (χ2(3) = 26.1, p < 0.0001).
#' The effect size (η2H = 0.0716) indicates a moderate practical significance,
#' suggesting that approximately 7.16% of the variability in microplastic
#' concentrations can be attributed to country differences.
#'
#' Pairwise comparisons using Wilcoxon rank sum tests showed that Spain had
#' significantly different microplastic concentrations compared to Czechia
#' (p < 0.0001) and Italy (p = 0.043). No significant differences were found
#' between other country pairs, due to limited sample sizes in Austria.
#'
#' The proportion of samples containing microplastics varied considerably
#' among countries: Spain (46.3%), Italy (20.5%), Czechia (16.3%), and
#' Austria (11.1%). This variation suggests differing levels of microplastic
#' pollution or detection across the studied European countries.
#'
# Visualize results with adjusted y-axis
ggplot2::ggplot(data_filtered, ggplot2::aes(x = country, y = concentration)) +
ggplot2::geom_boxplot(outlier.shape = NA) +
ggplot2::geom_jitter(width = 0.2, alpha = 0.5) +
ggplot2::labs(title = "Microplastic Concentrations by Country",
subtitle = paste("Kruskal-Wallis test p-value <", format(kruskal_result$p, scientific = TRUE, digits = 3)),
x = "",
y = "Concentration (MPs/g dry weight)") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
ggplot2::coord_cartesian(ylim = c(0, 12.5)) +
ggplot2::annotate("text", x = 0.7, y = 12,
label = "One sample in Austria \n is out of scale (78.95)",
hjust = 0, vjust = 1, size = 3, color = "red")
# Save the plot
ggplot2::ggsave("microplastic_concentration_by_country_adjusted.png", width = 10, height = 6, dpi = 300)
# Visualize proportion of samples with microplastics
ggplot2::ggplot(proportion_with_mp, ggplot2::aes(x = country, y = proportion, fill = country)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::geom_text(ggplot2::aes(label = scales::percent(proportion, accuracy = 0.1)),
vjust = -0.5, size = 3.5) +
ggplot2::labs(title = "Proportion of Samples with Microplastics by Country",
x = "",
y = "Proportion of Samples") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
legend.position = "none") +
ggplot2::scale_y_continuous(labels = scales::percent, limits = c(0, 1))
# Save the proportion plot
ggplot2::ggsave("proportion_samples_with_microplastics.png", width = 10, height = 6, dpi = 300)
# Filter for Czechia data and convert to sf object
cz_data <- data_wgs84 %>%
dplyr::filter(country == "Czechia") %>%
sf::st_as_sf(coords = c("X", "Y"), crs = 4326)
# Load Czechia administrative boundaries and population data
cz_districts <- RCzechia::okresy("low")
census_data <- czso::czso_get_table("SLDB-VYBER") %>%
dplyr::select(uzkod, obyvatel = vse1111) %>%
dplyr::mutate(obyvatel = as.numeric(obyvatel))
# Join population data to districts
cz_districts <- cz_districts %>%
dplyr::inner_join(census_data, by = c("KOD_OKRES" = "uzkod"))
# Calculate population density
cz_districts$pop_density <- cz_districts$obyvatel / (as.numeric(st_area(cz_districts)) / 1e6) # population per km2
# Find which district each microplastic sample belongs to
cz_data <- sf::st_join(cz_data, cz_districts)
# Calculate urbanization index based on population density
cz_data$urban_index <- scale(cz_data$pop_density)
# Statistical analysis
cz_model <- lm(concentration ~ urban_index, data = cz_data)
cz_summary <- summary(cz_model)
cz_correlation <- cor.test(cz_data$concentration, cz_data$urban_index)
# Visualization
create_map <- function(data) {
ggplot() +
geom_sf(data = cz_districts, aes(fill = pop_density)) +
geom_sf(data = data, aes(color = concentration, size = urban_index)) +
scale_fill_viridis_c(name = "Population Density", option = "C") +
scale_color_viridis_c(name = "Microplastic Concentration", option = "D") +
theme_minimal() +
labs(title = "Microplastic Concentration and Urbanization in Czechia",
size = "Urbanization Index")
}
create_scatter <- function(data) {
ggplot(data, aes(x = urban_index, y = concentration)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Microplastic Concentration vs. Urbanization in Czechia",
x = "Urbanization Index",
y = "Microplastic Concentration")
}
cz_map <- create_map(cz_data)
cz_scatter <- create_scatter(cz_data)
# not a good proxy with population, because sampling of course happened outside
Composition of microplastic types across different countries.Relative proportions of fibers, fragments, spheres, films, and sponges in each country.
str(combined_table)
## tibble [6 × 11] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:6] "Austria" "Czechia" "Italy" "Spain" ...
## $ Samples (n) : int [1:6] 9 196 39 82 20 346
## $ Abundance : num [1:6] 60 52 9 96 0 217
## $ SE : num [1:6] 6.67 0.05 0.08 0.25 0 0.19
## $ Fibers : int [1:6] 0 45 8 69 0 122
## $ Fragments : int [1:6] 60 0 1 9 0 70
## $ Spheres : int [1:6] 0 0 0 2 0 2
## $ Films : int [1:6] 0 6 0 9 0 15
## $ Sponges : int [1:6] 0 1 0 7 0 8
## $ Dry Weight (g) : num [1:6] 8.08 222.49 54.75 403.89 6.98 ...
## $ Average Density: num [1:6] 0.15 0.36 0.35 0.49 0 0.37
# Reshape the data for plotting
otter_data_long <- combined_table %>%
dplyr::select(Country, `Samples (n)`, Fibers, Fragments, Spheres, Films, Sponges) %>%
tidyr::pivot_longer(cols = Fibers:Sponges, names_to = "Type", values_to = "Count")
# Convert Country to factor and reorder levels by Count (abundance)
otter_data_long$Country <- factor(otter_data_long$Country,
levels = c("Netherlands","Italy","Austria", "Czechia","Spain", "Total"))
ggplot(otter_data_long, aes(x = Country, y = Count, fill = Type)) +
geom_bar(stat = "identity") +
geom_text(data = combined_table, aes(x = Country, y = max(-15), label = `Samples (n)`),
vjust = -0.5, size = 3, inherit.aes = FALSE) +
labs(title = "Microplastics in Otter Spraints by Country",
x = NULL,
y = "Total Microplastic Abundance",
fill = "Microplastic Type",
caption = "Number of samples (n) shown left of bars") +
scale_fill_brewer(palette = "Set3") +
theme_light() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
axis.text.x = element_text(hjust = 1),
legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
plot.caption = element_text(size = 9, hjust = 0))+
coord_flip()
# Calculate proportions of each microplastic type
data_proportions <- data %>%
dplyr::group_by(country) %>%
dplyr::summarise(dplyr::across(c(fibers, fragments, spheres, films, sponges), sum),
total = sum(fibers, fragments, spheres, films, sponges)) %>%
dplyr::mutate(dplyr::across(c(fibers, fragments, spheres, films, sponges),
~./total, .names = "prop_{.col}"))
# Reshape the data for plotting
data_long <- data_proportions %>%
tidyr::pivot_longer(cols = starts_with("prop_"),
names_to = "microplastic_type",
values_to = "proportion") %>%
dplyr::mutate(microplastic_type = stringr::str_remove(microplastic_type, "prop_")) %>%
dplyr::filter(country != "Netherlands")
# Create a stacked bar plot
ggplot2::ggplot(data_long, ggplot2::aes(x = country, y = proportion, fill = microplastic_type)) +
ggplot2::geom_bar(stat = "identity", position = "stack") +
ggplot2::scale_y_continuous(labels = scales::percent) +
ggplot2::scale_fill_brewer(palette = "Set3") +
ggplot2::labs(x = NULL, y = "Proportion", fill = "Microplastic Type") +
ggplot2::theme_minimal() +
ggplot2::coord_flip()
# Removed The Netherlands
# Composition of microplastic types across different countries.
# Relative proportions of fibers, fragments, spheres, films, and sponges in each country.
# Create boxplots total data
ggplot2::ggplot(data = data, ggplot2::aes(x = country, y = concentration, fill = country)) +
ggplot2::geom_boxplot() +
ggplot2::labs(title = "Microplastic Concentrations Across Countries",
x = "Country",
y = "Microplastic Concentration (MPs/g dry weight)",
fill = "Country") +
ggplot2::scale_fill_brewer(palette = "Set3") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))
# Filter out outliers with concentration > 30 MPs/g dry weight
data_no_outliers <- dplyr::filter(data, !(data$concentration > 30))
# Identify outliers for labeling
outliers <- dplyr::filter(data, data$concentration > 30)
# Create boxplots with outliers labeled
ggplot2::ggplot(data = data_no_outliers, ggplot2::aes(x = country, y = concentration, fill = country)) +
ggplot2::geom_boxplot() +
ggplot2::geom_text(data = outliers, ggplot2::aes(label = sample), x = outliers$country, y = 12,
hjust = -0.1, vjust = 0.5, color = "red", size = 3) +
ggplot2::ylim(0, 12) + # Limit y-axis to 12
ggplot2::labs(title = "Microplastic Concentrations Across Countries (Outliers Labeled)",
x = "Country",
y = "Microplastic Concentration (MPs/g dry weight)",
fill = "Country",
caption = "Outliers labeled in red") +
ggplot2::scale_fill_brewer(palette = "Set3") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))
cite_packages(out.format = "docx", out.dir = ".")
## | | | 0% | |......................................................................| 100%
## "C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS grateful-report.knit.md --to docx --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc10cc73430f2.docx --lua-filter "C:\RLibrary\rmarkdown\rmarkdown\lua\pagebreak.lua" --highlight-style tango --citeproc
## [1] "./grateful-report.docx"
cite_packages("paragraph", out.dir = ".")
We used R version 4.4.1 [@base] and the following R packages: annotater v. 0.2.3.9000 [@annotater], coin v. 1.4.3 [@coin2006; @coin2008], cowplot v. 1.1.3 [@cowplot], czso v. 0.4.1 [@czso], data.table v. 1.16.0 [@datatable], dunn.test v. 1.3.6 [@dunntest], effsize v. 0.8.1 [@effsize], flextable v. 0.9.6 [@flextable], ggthemes v. 5.1.0 [@ggthemes], here v. 1.0.1 [@here], janitor v. 2.2.0 [@janitor], kableExtra v. 1.4.0 [@kableExtra], knitr v. 1.48 [@knitr2014; @knitr2015; @knitr2024], leaflet v. 2.2.2 [@leaflet], officer v. 0.6.6 [@officer], osmdata v. 0.2.5 [@osmdata], pacman v. 0.5.1 [@pacman], raster v. 3.6.26 [@raster], RCzechia v. 1.12.2 [@RCzechia], rio v. 1.2.2 [@rio], rmarkdown v. 2.28 [@rmarkdown2018; @rmarkdown2020; @rmarkdown2024], rnaturalearth v. 1.0.1 [@rnaturalearth], rnaturalearthdata v. 1.0.0 [@rnaturalearthdata], rsconnect v. 1.3.1 [@rsconnect], rstatix v. 0.7.2 [@rstatix], scales v. 1.3.0 [@scales], sf v. 1.0.16 [@sf2018; @sf2023], terra v. 1.7.78 [@terra], tidyverse v. 2.0.0 [@tidyverse], viridis v. 0.6.5 [@viridis], writexl v. 1.5.0 [@writexl].
“We used R version 4.4.1 [@base] , dunn.test v. 1.3.6 [@dunntest], effsize v. 0.8.1 [@effsize], rstatix v. 0.7.2 [@rstatix], sf v. 1.0.16 [@sf2018; @sf2023], tidyverse v. 2.0.0 [@tidyverse]”
# # Convert sample data to sf object
# samples_sf <- sf::st_as_sf(data_wgs84, coords = c("X", "Y"), crs = 4326)
#
# # Filter for Czech Republic samples
# cz_samples <- samples_sf %>%
# dplyr::filter(country == "Czechia")
#
# # Get Czech Republic rivers and regions
# cz_rivers <- RCzechia::reky()
# cz_regions <- RCzechia::kraje()
#
# # Ensure the CRS matches
# cz_rivers <- sf::st_transform(cz_rivers, crs = sf::st_crs(cz_samples))
# cz_regions <- sf::st_transform(cz_regions, crs = sf::st_crs(cz_samples))
#
# # Find the nearest river for each sample
# nearest_rivers <- cz_rivers %>%
# sf::st_join(cz_samples, join = sf::st_nearest_feature)
#
# # Find the region for each sample
# samples_with_regions <- sf::st_join(cz_samples, cz_regions)
#
# # Create summary table
# cz_river_summary <- nearest_rivers %>%
# sf::st_drop_geometry() %>%
# dplyr::group_by(NAZEV) %>%
# dplyr::summarise(
# n_samples = sum(!is.na(sample))
# ) %>%
# dplyr::filter(n_samples > 0) %>%
# dplyr::arrange(dplyr::desc(n_samples))
#
# # Add region information
# cz_river_summary <- cz_river_summary %>%
# dplyr::left_join(
# samples_with_regions %>%
# sf::st_drop_geometry() %>%
# dplyr::group_by(sample) %>%
# dplyr::summarise(river = dplyr::first(NAZEV), region = dplyr::first(NAZEV.y)) %>%
# dplyr::group_by(river) %>%
# dplyr::summarise(region = dplyr::first(region)),
# by = c("NAZEV" = "river")
# )
#
# # Reorder columns
# cz_river_summary <- cz_river_summary %>%
# dplyr::select(n_samples, river = NAZEV, region)
#
# # Print summary
# print(cz_river_summary)
#
# # Save summary to CSV
# readr::write_csv(cz_river_summary, "czech_samples_per_river.csv")
Comparison of microplastic concentrations in otter (Lutra lutra) spraints between Spain (n = 82) and Czechia (n = 196). The boxplots display the median (central line), interquartile range (box), and range (whiskers) of microplastic concentrations. Individual data points are jittered to show the distribution. Microplastic concentration is expressed as the number of microplastic particles per gram of dry weight (MPs g^-1 dw).The Mann-Whitney U test revealed a significant difference in microplastic concentrations between the two countries (W = 10323, p = 9.27e-07), with Spain showing higher levels of contamination.
# # Filter data for Spain and Czechia
# data_czsp <- data %>%
# dplyr::filter(country %in% c("Spain", "Czechia")) %>%
# dplyr::mutate(concentration = as.numeric(concentration)) %>%
# dplyr::filter(!is.na(concentration))
#
#
# readr::write_csv(data_czsp, file = here::here("data", "data_czsp.csv"))
#
#
# # # Ensure data is properly formatted
# data_czsp <- data_czsp %>%
# dplyr::filter(!is.na(concentration), !is.na(country)) %>%
# dplyr::mutate(concentration = as.numeric(concentration))
#
# # Perform statistical analysis
# spain_conc <- data_czsp$concentration[data_czsp$country == "Spain"]
# czechia_conc <- data_czsp$concentration[data_czsp$country == "Czechia"]
# wilcox_result <- wilcox.test(spain_conc, czechia_conc)
#
#
# # Create the plot with statistical results
# plot <- ggplot2::ggplot(data_czsp, ggplot2::aes(x = country, y = concentration)) +
# ggplot2::geom_boxplot(ggplot2::aes(fill = country), width = 0.5, outlier.shape = NA) +
# ggplot2::geom_jitter(width = 0.2, alpha = 0.5) +
# ggthemes::scale_fill_colorblind() +
# ggplot2::labs(
# title = "Microplastic Concentrations in Otter Spraints",
# subtitle = paste0("Mann-Whitney U test: W = ", round(wilcox_result$statistic, 2),
# ", p = ", format.pval(wilcox_result$p.value, digits = 3), "\n"
# ),
# x = "",
# y = expression("Microplastic Concentration (MPs g"^-1*" dry weight)"),
# fill = "Country"
# ) +
# ggplot2::theme_minimal() +
# ggplot2::theme(
# plot.title = ggplot2::element_text(face = "bold", size = 14),
# plot.subtitle = ggplot2::element_text(size = 10),
# axis.title = ggplot2::element_text(size = 11),
# axis.text = ggplot2::element_text(size = 10),
# legend.position = "none"
# )
#
# print(plot)
#
# ggplot2::ggsave(here::here("output","figs", "microplastic_concentration_comparison.png"),
# plot = plot, width = 8, height = 6, dpi = 300)
#
# # Summary statistics
# summary_stats <- data_czsp %>%
# dplyr::group_by(country) %>%
# dplyr::summarize(
# mean = mean(concentration),
# median = median(concentration),
# sd = sd(concentration),
# n = n()
# )
#
# summary_stats
# str(data)
# head(data)
#
#
# # Filter data for the selected countries
# data_czsp <- data %>%
# dplyr::filter(country %in% c("Spain", "Czechia"))
#
# # Perform Kruskal-Wallis test to compare distributions of concentrations between countries
# kw_result <- stats::kruskal.test(concentration ~ country, data = data_czsp)
#
# kw_result
#
# #strong evidence to reject the null hypothesis that the distributions of microplastic concentrations are the same between these two countries.
#
# # Perform Dunn's test for pairwise comparisons with Bonferroni adjustment
# dunn_result <- dunn.test(data_czsp$concentration, data_czsp$country, method = "bonferroni")
#
# dunn_result
#
# # Microplastic concentrations are significantly lower in Spain compared to Czechia.
# # Calculate the proportion of each plastic type for each country
# plastic_proportions <- data_czsp %>%
# dplyr::group_by(country) %>%
# dplyr::summarise(dplyr::across(c(fibers, fragments, spheres, films), sum)) %>%
# dplyr::mutate(total = fibers + fragments + spheres + films) %>%
# dplyr::mutate(dplyr::across(c(fibers, fragments, spheres, films), ~./total, .names = "prop_{.col}"))
#
# # Reshape the data for plotting
# plastic_long <- plastic_proportions %>%
# tidyr::pivot_longer(cols = dplyr::starts_with("prop_"),
# names_to = "plastic_type",
# values_to = "proportion") %>%
# dplyr::mutate(plastic_type = stringr::str_remove(plastic_type, "prop_"))
#
# # Create a stacked bar plot
# ggplot2::ggplot(plastic_long, ggplot2::aes(x = country, y = proportion, fill = plastic_type)) +
# ggplot2::geom_bar(stat = "identity", position = "stack") +
# ggplot2::scale_y_continuous(labels = scales::percent) +
# ggplot2::scale_fill_brewer(palette = "Set3") +
# ggplot2::labs(title = "Proportion of Microplastic Types in Otter Spraints", x = "",
# y = "",
# fill = "Microplastic Type") +
# ggplot2::theme_minimal() +
# ggplot2::theme(legend.position = "right")
#
# ggplot2::ggsave("microplastic_types_comparison.png", width = 10, height = 6, dpi = 300)
#
# # Statistical comparison for each plastic type
# plastic_types <- c("fibers", "fragments", "spheres", "films")
#
# statistical_results <- lapply(plastic_types, function(type) {
# wilcox_test <- stats::wilcox.test(data_czsp[[type]] ~ data_czsp$country)
# data.frame(
# plastic_type = type,
# statistic = wilcox_test$statistic,
# p_value = wilcox_test$p.value
# )
# })
#
# statistical_results_df <- do.call(rbind, statistical_results)
#
# print(statistical_results_df)
#
# # Calculate effect sizes
# effect_sizes <- data_czsp %>%
# tidyr::gather(key = "plastic_type", value = "count", fibers, fragments, spheres, films) %>%
# dplyr::group_by(plastic_type) %>%
# rstatix::wilcox_effsize(count ~ country)
#
# print(effect_sizes)