library(readxl) #For importing excel files.
library(tidyverse) #For all our data wrangling tools!
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(FSA) #for the removal function.
## ## FSA v0.9.5. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.
library(ggplot2)
Use the following code to view the file directory in this project
list.files(path = ".", full.names = TRUE)
## [1] "./assingment.R"
## [2] "./Bergvattensjöbäcken.csv"
## [3] "./Bergvattensjöbäcken.xlsx"
## [4] "./Electrofishin Assignment 2 and 3.R"
## [5] "./Electrofishing Assignment Markdown.R"
## [6] "./Electrofishing Assignment Markdown.Rmd"
## [7] "./Electrofishing_Lab.html"
## [8] "./Electrofishing-Assignment-Markdown.html"
## [9] "./Electrofishing-Assignment-Markdown.Rmd"
## [10] "./Electrofishing.R"
## [11] "./Electrofishing.Rproj"
## [12] "./Task 1.xlsx"
## [13] "./Task 10.xlsx"
## [14] "./Task 3.xlsx"
## [15] "./Task 4.xlsx"
## [16] "./Task 5.xlsx"
## [17] "./Task 6.xlsx"
## [18] "./Task 7.xlsx"
## [19] "./Task 8.xlsx"
## [20] "./Task 9.xlsx"
In this Part 1, we create 3x4 histograms with Site as columns and Year as rows. We begin by reading the file in question.
my_file <- "Task 4.xlsx"
fishlengths <- read_excel(my_file, sheet="Fish lengths")
names(fishlengths)
## [1] "LAN" "HFLODOMR" "Stream" "Fishing site" "XKOORLOK"
## [6] "YKOORLOK" "Fishing date" "Species" "Length (mm)"
fishlengths <- fishlengths %>%
select(Site = `Fishing site`, Date = `Fishing date`, Species,
Length = `Length (mm)`) %>%
filter(Species == "Brown trout") %>%
mutate(Year = substr(Date,1,4))
fishlengths
## # A tibble: 709 × 5
## Site Date Species Length Year
## <chr> <dbl> <chr> <dbl> <chr>
## 1 Clear-cut 19960809 Brown trout 49 1996
## 2 Clear-cut 19960809 Brown trout 50 1996
## 3 Clear-cut 19960809 Brown trout 50 1996
## 4 Clear-cut 19960809 Brown trout 50 1996
## 5 Clear-cut 19960809 Brown trout 51 1996
## 6 Clear-cut 19960809 Brown trout 52 1996
## 7 Clear-cut 19960809 Brown trout 52 1996
## 8 Clear-cut 19960809 Brown trout 53 1996
## 9 Clear-cut 19960809 Brown trout 53 1996
## 10 Clear-cut 19960809 Brown trout 53 1996
## # ℹ 699 more rows
Now that the data is cleaned up, we can plot the fish lenghts data to get a histogram, by Site and Year.
ggplot(data = fishlengths, aes(Length, fill = Site)) +
geom_histogram(binwidth=5) +
theme_bw() + theme(legend.position = "none") +
facet_grid(Year ~ Site) +
ggtitle("Figure 1. Histogram of Fish Lengths") # Add the figure title
Fish lenghts histogram by site and year
The clear cut happened between 1996 and 1997, after the first removal survey was conducted in 1996. From Figure 1 above, we notice an decline in fish lengths, especially between 1998 and 1999 for the clear cut sections. On the other hand, we do notice a decline in fish lengths in 1998 for upstream and downstream sections, with an uptake in 1999. We would say that there is not enough data to really conclude that clear cut has an effect on fish lengths.
To proceed, we clean up the remaining of the Task 4 data and create a relation between the tabs within the spreadsheet. The tabs in question; Site_description and Fishing_results
Site_description <- read_excel(my_file, sheet="Site description")
names(Site_description)
## [1] "Stream" "XKOORVDR"
## [3] "YKOORVDR" "Fishing site"
## [5] "Altitude" "XKOORLOK"
## [7] "YKOORLOK" "Fishing date"
## [9] "Fabricate" "Power"
## [11] "VOLT" "ampere"
## [13] "Pulse" "Removals"
## [15] "METOD" "Stream witdth"
## [17] "fished length" "Fished width"
## [19] "AREA (m2)" "max depth"
## [21] "mean depth" "bottom topo"
## [23] "SUBSTR1" "SUBSTR2"
## [25] "SUBSTR3" "water level"
## [27] "Water velocity" "Water temp"
## [29] "Air temp" "Amount of water veg"
## [31] "Bottom vegetation" "Local environment"
## [33] "Dom tree 1" "Dom tree 2"
## [35] "Shadowing (%)" "Wood in water (n)"
## [37] "Wood in water (n/100m2)" "Migration hinder"
## [39] "Population type trout" "Lime influenced"
## [41] "Municiple" "Map"
## [43] "Year" "Month"
Site_description <- Site_description %>%
select(Stream, Site = `Fishing site`, Date = `Fishing date`, Fished_Area = `AREA (m2)`)
Site_description
## # A tibble: 12 × 4
## Stream Site Date Fished_Area
## <chr> <chr> <dbl> <dbl>
## 1 Gällarbäcken Clear-cut 19960809 524
## 2 Gällarbäcken Clear-cut 19970819 471
## 3 Gällarbäcken Clear-cut 19980816 535
## 4 Gällarbäcken Clear-cut 19990813 374
## 5 Gällarbäcken Down-stream clear-cut 19960810 466.
## 6 Gällarbäcken Down-stream clear-cut 19970820 477
## 7 Gällarbäcken Down-stream clear-cut 19980816 519.
## 8 Gällarbäcken Down-stream clear-cut 19990813 392.
## 9 Gällarbäcken Up-stream clear-cut 19960810 464
## 10 Gällarbäcken Up-stream clear-cut 19970820 475
## 11 Gällarbäcken Up-stream clear-cut 19980816 540
## 12 Gällarbäcken Up-stream clear-cut 19990813 410
fishingresults <- read_excel(my_file, sheet="Fishing result")
names(fishingresults)
## [1] "xkoorlok" "ykoorlok" "Stream" "Fishing site" "Fishing date"
## [6] "Species" "year class" "Remove1" "Remove2" "Remove3"
fishingresults <- fishingresults %>%
select(Stream, Site = `Fishing site`, Date = `Fishing date`, Species, Year_class = `year class`,
Remove1, Remove2, Remove3) %>%
filter(Species == "Brown trout") %>%
mutate(Year = substr(Date,1,4))
Next we combine columns from Fishing results tab to the Site description tab.
Your_task <- Site_description %>%
left_join(
fishingresults,
by = c("Stream", "Site", "Date"))
Your_task
## # A tibble: 24 × 10
## Stream Site Date Fished_Area Species Year_class Remove1 Remove2 Remove3
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Gällarbä… Clea… 2.00e7 524 Brown … >0+ 21 15 9
## 2 Gällarbä… Clea… 2.00e7 524 Brown … 0+ 12 8 3
## 3 Gällarbä… Clea… 2.00e7 471 Brown … >0+ 35 11 9
## 4 Gällarbä… Clea… 2.00e7 471 Brown … 0+ 23 22 6
## 5 Gällarbä… Clea… 2.00e7 535 Brown … >0+ 5 3 2
## 6 Gällarbä… Clea… 2.00e7 535 Brown … 0+ 2 2 0
## 7 Gällarbä… Clea… 2.00e7 374 Brown … >0+ 7 3 2
## 8 Gällarbä… Clea… 2.00e7 374 Brown … 0+ 3 0 2
## 9 Gällarbä… Down… 2.00e7 466. Brown … >0+ 25 19 11
## 10 Gällarbä… Down… 2.00e7 466. Brown … 0+ 20 14 8
## # ℹ 14 more rows
## # ℹ 1 more variable: Year <chr>
the removal fucntion can be applied to our data to estimates the population of fish using the removal function.
removal_result <- Your_task %>%
rowwise() %>%
mutate(res = list(removal(c(Remove1, Remove2, Remove3), just.ests = TRUE))) %>%
unnest_wider(col = res)
We add density (fish/100m^2) and its CI to the removal_result data frame.
removal_result <- removal_result %>%
mutate(Dens = 100 * No / Fished_Area,
Dens_LCI = 100 * No.LCI / Fished_Area,
Dens_UCI = 100 * No.UCI / Fished_Area)
We learned how to run the removal function, on one row at a time, using the following process.
one_site <- Your_task %>%
filter(Site == 'Clear-cut', Year_class == ">0+", Year == "1996")
est <- removal(catch = c(one_site$Remove1, one_site$Remove2, one_site$Remove3),
just.ests = TRUE)
fish_dens <- 100 * est["No"] / one_site$Fished_Area
Using the PRP we can expresses the average width of the confidence interval as a percentage of the estimate.
PRP <- function(N, LCI, UCI) {
return(50 * (UCI - LCI)/ N)
}
Once we have the PRP function defined, we can apply it to the removal_results data frame.
removal_result <- removal_result %>%
mutate(p_prp = PRP(p, p.LCI, p.UCI)) %>%
mutate(fish_caught = Remove1 + Remove2 + Remove3)
To present the data in a more visual way, we plot them in scatter plot with arrow bars.
We run a catchability plot, which tells us the probability of catching an individual during our sampling efforts.
ggplot(data = removal_result, aes(Year, p, colour=Year)) +
geom_point(size=4) +
geom_errorbar(aes(ymin = p.LCI, ymax = p.UCI,colour=Year), position = "dodge", width = 0.25) +
theme_bw() +
theme(axis.text.x = element_text(size=14),
axis.text.y = element_text(size=14),
strip.text = element_text(size=14)) +
facet_grid(rows = vars(Year_class), cols = vars(Site)) +
ggtitle("Figure 2a. Catchability for Sites and Years")
We run a density plot to estimate the density of individuals in a survey area.
ggplot(data = removal_result, aes(Year, Dens, colour = Year)) +
geom_point(size=4) +
geom_errorbar(aes(ymin = Dens_LCI, ymax = Dens_UCI, colour=Year),
position = "dodge", width = 0.25) +
theme_bw() +
theme(axis.text.x = element_text(size=14, angle = -90),
axis.text.y = element_text(size=14),
strip.text = element_text(size=14)) +
facet_grid(rows = vars(Year_class), cols = vars(Site))+
ggtitle("Figure 2b. Density for Sites and Years")
To recreate this process for all Task files, we begin by creating a function called slurp_xlsx to read and combine all the Task files together.
slurp_xlsx <- function(fnames, sheet = 1) {
res <- lapply(fnames, readxl::read_xlsx, sheet = sheet)
res <- dplyr::bind_rows(res)
return(res)
}
Use the following code to combine the relevant files into a vector.
all_files <- c("Task 1.xlsx", "Task 3.xlsx", "Task 10.xlsx","Task 4.xlsx", "Task 5.xlsx", "Task 6.xlsx", "Task 7.xlsx", "Task 8.xlsx", "Task 9.xlsx")
Since the files used, are stored in a folder containing other unrelevant files, the list.files()) function can not be used.
We use the following code with the new slurp function to combine all site description tasks from each Task file.
Site_description <- slurp_xlsx(all_files, sheet="Site description")
After that we clean the new sheet, selection and renaming the relevant columns.
Site_description <- Site_description %>%
select(Stream, Site = `Fishing site`, Date = `Fishing date`, Fished_Area = `AREA (m2)`)
unique(Site_description$Stream)
## [1] "Bergvattensjöbäcken" "Gulån" "Östra Bjurbäcken"
## [4] "Gällarbäcken" "Mörttjärnsbäcken" "Rötjärnsbäcken"
## [7] "Svartbäcken" "Vallsbäcken" "Västra Dalkarlsån"
We rerun the above process for the fishing_results sheets.
fishingresults <- slurp_xlsx(all_files, sheet="Fishing result")
We clean fishing results with a filter() to save only “Brown trout”, keep only relevant columns. We also add a Year column.
fishingresults <- fishingresults %>%
select(Stream, Site = `Fishing site`, Date = `Fishing date`, Species, Year_class = `year class`,
Remove1, Remove2, Remove3) %>%
filter(Species == "Brown trout") %>%
mutate(Year = substr(Date,1,4))
unique(fishingresults$Stream) #View unique streams for the dataset
## [1] "Bergvattensjöbäcken" "Gulån" "Östra Bjurbäcken"
## [4] "Gällarbäcken" "Mörttjärnsbäcken" "Rötjärnsbäcken"
## [7] "Svartbäcken" "Vallsbäcken" "Västra Dalkarlsån"
Explanation for the content of this code, review the code from part 1 of the assignment
To get estimates we need to know the fished area available in Site_description. Therefore combine the two data frames into one using left_join()
total_data <- fishingresults %>%
left_join(Site_description,
by = c("Stream", "Site", "Date"))
total_data
## # A tibble: 216 × 10
## Stream Site Date Species Year_class Remove1 Remove2 Remove3 Year
## <chr> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr>
## 1 Bergvattensjöb… Clea… 2.00e7 Brown … >0+ 22 7 3 1996
## 2 Bergvattensjöb… Clea… 2.00e7 Brown … >0+ 22 15 5 1997
## 3 Bergvattensjöb… Clea… 2.00e7 Brown … >0+ 4 7 3 1998
## 4 Bergvattensjöb… Clea… 2.00e7 Brown … >0+ 25 13 8 1999
## 5 Bergvattensjöb… Down… 2.00e7 Brown … >0+ 11 4 2 1996
## 6 Bergvattensjöb… Down… 2.00e7 Brown … >0+ 21 11 2 1997
## 7 Bergvattensjöb… Down… 2.00e7 Brown … >0+ 8 4 0 1998
## 8 Bergvattensjöb… Down… 2.00e7 Brown … >0+ 12 4 7 1999
## 9 Bergvattensjöb… Up-s… 2.00e7 Brown … >0+ 15 7 4 1996
## 10 Bergvattensjöb… Up-s… 2.00e7 Brown … >0+ 24 10 3 1997
## # ℹ 206 more rows
## # ℹ 1 more variable: Fished_Area <dbl>
Using the removal function we can now calculate population estimates for the entire data set.
removal_result <- total_data %>%
rowwise() %>%
mutate(res = list(removal(c(Remove1, Remove2, Remove3), just.ests = TRUE))) %>%
unnest_wider(col = res)
We use the density formula (fish/100m^2) in combination with fished area and population estimate to get a density estimate.
removal_result <- removal_result %>%
mutate(Dens = 100 * No / Fished_Area,
Dens_LCI = 100 * No.LCI / Fished_Area,
Dens_UCI = 100 * No.UCI / Fished_Area)
Again, using the PRP we can expresses the average width of the confidence interval as a percentage of the estimate, for the entire data set this time.
PRP <- function(N, LCI, UCI) {
return(50 * (UCI - LCI)/ N)
}
Explanation for the content of this code, review the code from part 2 of the assignment
removal_result <- removal_result %>%
mutate(p_prp = PRP(p, p.LCI, p.UCI)) %>%
mutate(fish_caught = Remove1 + Remove2 + Remove3)
We now have a data frame with estimates from 9 different streams, each fished at three different sites and four years.
unique(removal_result$Stream) # Show streams
## [1] "Bergvattensjöbäcken" "Gulån" "Östra Bjurbäcken"
## [4] "Gällarbäcken" "Mörttjärnsbäcken" "Rötjärnsbäcken"
## [7] "Svartbäcken" "Vallsbäcken" "Västra Dalkarlsån"
unique(removal_result$Site) # Show site types
## [1] "Clear-cut" "Down-stream clear-cut" "Up-stream clear-cut"
unique(removal_result$Year) # Show years
## [1] "1996" "1997" "1998" "1999"
We create a summary with the mean density for all fishing in this study grouped by Site, Year and treatment. The overall summary is named total_summary.
total_summary <- removal_result %>%
group_by(Site, Year, Year_class) %>%
summarise(Dens_mean = mean(Dens),
Dens_LCI = Dens_mean - FSA::se(Dens),
Dens_UCI = Dens_mean + FSA::se(Dens),
.groups = "drop")
We create a figure that shows the development of the population at each treatment (Site) and separate plots for the two Year classes.
summary_plot <- ggplot(data = total_summary, mapping = aes(x = as.numeric(Year), y = Dens_mean)) +
geom_point(aes(colour = Year), size = 4) + # Add 'aes(colour = Year)' here
geom_errorbar(aes(ymin = Dens_LCI, ymax = Dens_UCI, colour = Year),
position = "dodge", width = 0.25) +
theme_bw() +
theme(axis.text.x = element_text(size = 14, angle = -90),
axis.text.y = element_text(size = 14),
strip.text = element_text(size = 14)) +
facet_grid(rows = vars(Year_class), cols = vars(Site)) +
ggtitle("Figure 3. Mean Density at Sites and Years")
summary_plot # Show the plot
We saved the figure with ggsave(“filename.png”, summary_plot) and interactively in Rstudio.
Here we analyse the results of Figure 3.
For the older trout (>0+), we notice a steady decline in population density at the clear cut site, from before the clear cut event, to 3 years after. In contrast, for the upstream and downstream sites, the population density can be seen rising immediately after the clear cut event, then declining the second year, with an uptake at the third year.
For the young of the year (0+), we see an uptake in population density immediately after the clear cut event, then declining the second year, with an uptake at the third year, for clear cut and upstream sites. But we can see an overall increase in population density at the downstream site.
Considering the scale of the removal survey, and the confidence interval limits displayed in Figure 2, and the randomness of the data, we conclude that this survey is inconclusive to determine the effect clear cutting had on Brown Trout. Though if did have to make a conclusion, we would say that clear cutting has a negative effect on adults, especially at clear cut sites, but a high likely hood of density bouncing back after a few years. And clear cutting has a positive effect on young, especially at down stream sites.
One of the assumptions of the removal method is that the number of
caught individuals get lower for each pass. The logical expression
(Remove1 > Remove2 & Remove2 > Remove3) will be
TRUE for all rows in removal_result that fulfill this criteria.
We use the removal_result dataset to find out how many times the result don’t fulfill the criteria.
nrow(removal_result)
## [1] 216
good_rows <- removal_result %>%
dplyr::filter(Remove1 > Remove2 & Remove2 > Remove3)
nrow(good_rows)
## [1] 130
To get a more precise data set, we could consider removing the rows that don’t fit the assumption. Prior to the survey, though, we could look at the removal method techniques and site selection to secure better results, without being bias. The assumption is that each removal will be less than the last during a survey, data which doesn’t fulfill that assumption, makes the data set uncertain and risks skewing the results.
Finally, we create figures that show how the total catch (fish_caught) affects PRP of catchabillity (p_prp in removal_result).
We decided to separate the PRP analysis, according to Year Class.
# Filter the dataset for year_class "0+"
removal_result_0 <- removal_result %>% filter(Year_class == "0+")
# Create the plot for year_class "0+"
PRP_plot_0 <- ggplot(data = removal_result_0, aes(x = fish_caught, y = p_prp)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "blue", size = 1.2, fullrange = TRUE) +
theme_bw() +
labs(x = "Total number of marked animals in previous catches",
y = "Catches of unmarked animals (PRP)",
title = "Figure 4a. Year Class 0+") +
theme(axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print the plot for year_class "0+"
print(PRP_plot_0)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 27 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Filter the dataset for year_class ">0+"
removal_result_above_0 <- removal_result %>% filter(Year_class == ">0+")
# Create the plot for year_class ">0+"
PRP_plot_above_0 <- ggplot(data = removal_result_above_0, aes(x = fish_caught, y = p_prp)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "blue", size = 1.2, fullrange = TRUE) +
theme_bw() +
labs(x = "Total number of marked animals in previous catches",
y = "Catches of unmarked animals (PRP)",
title = "Figure 4b. Year Class >0+") +
theme(axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
# Print the plot for year_class ">0+"
print(PRP_plot_above_0)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 7 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
For both year classes we have a decline regression line, which indicates that the assumption that consecutive catches are lower than the last, is true. In year class 0+, the regression line is not as steep as the one for year class >0+, that could be due to a smaller variation between each removal, or indication that some of the removals had higher catches than the previous ones.
A more thorough clean up of the data set could help with that, such as removing observations where catches were higher than the previous ones. It would also improve the visibility of the data set to create a PRP plot for each treatment type within each year class.
98 rows are below 50%.
84 rows are above 50%.
34 rows are NA due to no catches on some of the removals.