library(leaflet)
library(dplyr)
library(tidyr)
library(ggplot2)
library(DT)
library(scales) # install.packages("scales")
Ignore warnings & viz DF with head
data_url <- "https://knb.ecoinformatics.org/knb/d1/mn/v2/object/urn%3Auuid%3Af119a05b-bbe7-4aea-93c6-85434dcb1c5e"
esc <- tryCatch(
read.csv("data/escapement.csv"),
error=function(cond) {
message(paste("Escapement file does not seem to exist, so get it from the KNB."))
esc <- read.csv(url(data_url, method = "libcurl"))
return(esc)
}
)
## Warning in file(file, "rt"): cannot open file 'data/escapement.csv': No such
## file or directory
## Escapement file does not seem to exist, so get it from the KNB.
head(esc)
## Location SASAP.Region sampleDate Species DailyCount Method Latitude
## 1 Akalura Creek Kodiak 1930-05-24 Sockeye 4 Unknown 57.1641
## 2 Akalura Creek Kodiak 1930-05-25 Sockeye 10 Unknown 57.1641
## 3 Akalura Creek Kodiak 1930-05-26 Sockeye 0 Unknown 57.1641
## 4 Akalura Creek Kodiak 1930-05-27 Sockeye 0 Unknown 57.1641
## 5 Akalura Creek Kodiak 1930-05-28 Sockeye 0 Unknown 57.1641
## 6 Akalura Creek Kodiak 1930-05-29 Sockeye 0 Unknown 57.1641
## Longitude Source
## 1 -154.2287 ADFG
## 2 -154.2287 ADFG
## 3 -154.2287 ADFG
## 4 -154.2287 ADFG
## 5 -154.2287 ADFG
## 6 -154.2287 ADFG
Goal: What is the annual escapement by species and region?
Escapement = count (# fish that have returned to spawn and thus have
escaped perils along the way).
Below I will try to do this on my own and then compare with the
instructor’s code. 1. First, if we want annual info, we’ll need to
extract the year from the sampleDate column.
esc_yr <- esc %>%
separate(sampleDate, c("year", "month", "day"), "-")
head(esc_yr)
## Location SASAP.Region year month day Species DailyCount Method Latitude
## 1 Akalura Creek Kodiak 1930 05 24 Sockeye 4 Unknown 57.1641
## 2 Akalura Creek Kodiak 1930 05 25 Sockeye 10 Unknown 57.1641
## 3 Akalura Creek Kodiak 1930 05 26 Sockeye 0 Unknown 57.1641
## 4 Akalura Creek Kodiak 1930 05 27 Sockeye 0 Unknown 57.1641
## 5 Akalura Creek Kodiak 1930 05 28 Sockeye 0 Unknown 57.1641
## 6 Akalura Creek Kodiak 1930 05 29 Sockeye 0 Unknown 57.1641
## Longitude Source
## 1 -154.2287 ADFG
## 2 -154.2287 ADFG
## 3 -154.2287 ADFG
## 4 -154.2287 ADFG
## 5 -154.2287 ADFG
## 6 -154.2287 ADFG
Looks like the count data we need is an integer (int),
which is good. year is (chr), which might be problematic
later, but since we’re looking to calc annual esc, I suppose we will sum
esc for each year and so it will be ok to leave year as (chr) bc it’s
just a category for grouping.
esc_analysis <- esc_yr %>%
select(year, SASAP.Region, Species, DailyCount)
head(esc_analysis)
## year SASAP.Region Species DailyCount
## 1 1930 Kodiak Sockeye 4
## 2 1930 Kodiak Sockeye 10
## 3 1930 Kodiak Sockeye 0
## 4 1930 Kodiak Sockeye 0
## 5 1930 Kodiak Sockeye 0
## 6 1930 Kodiak Sockeye 0
And it looks like the DF is already in tall format, so no need to pivot.
Tried to do 2 commands at once with the pipe… it worked!
ann_esc_reg_spp <- esc_analysis %>%
group_by(SASAP.Region, Species, year) %>%
summarise(annual.catch = sum (DailyCount))
## `summarise()` has grouped output by 'SASAP.Region', 'Species'. You can override
## using the `.groups` argument.
head(ann_esc_reg_spp)
## # A tibble: 6 × 4
## # Groups: SASAP.Region, Species [1]
## SASAP.Region Species year annual.catch
## <chr> <chr> <chr> <int>
## 1 Alaska Peninsula and Aleutian Islands Chinook 1974 1092
## 2 Alaska Peninsula and Aleutian Islands Chinook 1975 1917
## 3 Alaska Peninsula and Aleutian Islands Chinook 1976 3045
## 4 Alaska Peninsula and Aleutian Islands Chinook 1977 4844
## 5 Alaska Peninsula and Aleutian Islands Chinook 1978 3901
## 6 Alaska Peninsula and Aleutian Islands Chinook 1979 10463
Well my try took 3 steps (1-3 above) and looks pretty similar to the
DF that the instructor created. However, they did a final
filter function after summarize and I’m not
sure why. They filtered by 5 species names.
num_unique_values <- length(unique(ann_esc_reg_spp$Species))
print(num_unique_values)
## [1] 21
I ran the code above to see how many diff spp there are in the DF. There are 21. So, I think they wanted to only look at those 5 spp, but that wasn’t in the question. Oh well, close enough.
Here’s their code all in one step (with the added filtering)
annual_esc <- esc %>%
separate(sampleDate, c("Year", "Month", "Day"), sep = "-") %>%
mutate(Year = as.numeric(Year)) %>%
group_by(Species, SASAP.Region, Year) %>%
summarize(escapement = sum(DailyCount)) %>%
filter(Species %in% c("Chinook", "Sockeye", "Chum", "Coho", "Pink"))
## `summarise()` has grouped output by 'Species', 'SASAP.Region'. You can override
## using the `.groups` argument.
head(annual_esc)
## # A tibble: 6 × 4
## # Groups: Species, SASAP.Region [1]
## Species SASAP.Region Year escapement
## <chr> <chr> <dbl> <int>
## 1 Chinook Alaska Peninsula and Aleutian Islands 1974 1092
## 2 Chinook Alaska Peninsula and Aleutian Islands 1975 1917
## 3 Chinook Alaska Peninsula and Aleutian Islands 1976 3045
## 4 Chinook Alaska Peninsula and Aleutian Islands 1977 4844
## 5 Chinook Alaska Peninsula and Aleutian Islands 1978 3901
## 6 Chinook Alaska Peninsula and Aleutian Islands 1979 10463
I’m going to re-write my 3-step code into a single pipe and see if I get the same DF
try2 <- esc %>%
separate(sampleDate, c("year", "month", "day"), "-") %>%
select(year, SASAP.Region, Species, DailyCount) %>%
group_by(SASAP.Region, Species, year) %>%
summarise(annual.catch = sum (DailyCount))
## `summarise()` has grouped output by 'SASAP.Region', 'Species'. You can override
## using the `.groups` argument.
head(try2)
## # A tibble: 6 × 4
## # Groups: SASAP.Region, Species [1]
## SASAP.Region Species year annual.catch
## <chr> <chr> <chr> <int>
## 1 Alaska Peninsula and Aleutian Islands Chinook 1974 1092
## 2 Alaska Peninsula and Aleutian Islands Chinook 1975 1917
## 3 Alaska Peninsula and Aleutian Islands Chinook 1976 3045
## 4 Alaska Peninsula and Aleutian Islands Chinook 1977 4844
## 5 Alaska Peninsula and Aleutian Islands Chinook 1978 3901
## 6 Alaska Peninsula and Aleutian Islands Chinook 1979 10463
It worked! Yay for pipes!
Using GGplot.
uses an aesthetic aes() and a geometry
geom() layer (or multiple layers).
This first plot is with the instructor’s DF
ggplot(annual_esc, aes(x = Species, y = escapement)) +
geom_col()
Now here is mine
ggplot(try2, aes(x = Species, y = annual.catch)) +
geom_col()
lol now I understand why they filtered out just a few spp, bc the graph is too crowded.
I’ll use the instructor’s DF so I can C/P their code for the graphs.
So many graphing options with ggplot, most of these I am already familiar with.
ggplot(annual_esc, aes(x = Species, y = escapement, fill = SASAP.Region)) +
geom_col()
They filtered again, this time for region and then made a line
graph.
They also used theme_bw to make the background white &
change grid color.
kodiak_esc <- annual_esc %>%
filter(SASAP.Region == "Kodiak")
ggplot(kodiak_esc, aes(x = Year, y = escapement, color = Species)) +
geom_line() +
geom_point() +
ylab("Escapement") +
ggtitle("Kodiak Salmon Escapement") +
theme_bw()
This I didn’t know before, you can save the results of a series of
theme functions to an object ex: my_theme and
use it on multiple plots to prevent over C/P.
my_theme <- theme_bw() +
theme(legend.position = "bottom", legend.title = element_blank())
ggplot(kodiak_esc, aes(x = Year, y = escapement, color = Species)) +
geom_line() +
geom_point() +
ylab("Escapement") +
ggtitle("Kodiak Salmon Escapement") +
my_theme
Fix the axis scales with scales package and
labels argument.
ggplot(kodiak_esc, aes(x = Year, y = escapement, color = Species)) +
geom_line() +
geom_point() +
scale_y_continuous(labels = comma) +
ylab("Escapement") +
ggtitle("Kodiak Salmon Escapement") +
my_theme
This is useful for saving the graph as a file, though prev I did it manually and that was fine.
ggsave("kodiak_esc.png", width = 3, height = 3, units = "in")
Use facet_wrap to generate mult plots - ex: one per
region
ggplot(annual_esc, aes(x = Year, y = escapement, color = Species)) +
geom_line() +
geom_point() +
scale_y_continuous(labels = comma) +
facet_wrap(~SASAP.Region, scales = "free_y", ncol = 2) +
ylab("Escapement") +
my_theme
Using the DT package we will do some cool viz and pub to
GutHub!
distinct and create an interactive
display with datatable.locations <- esc %>%
distinct(Location, Latitude, Longitude) %>%
drop_na()
datatable(locations)
This is really cool!
so we’ll use leaflet to create viz, but unlike ggplot it
uses (%>%) the pipe to connect functions rather than the (+).
leaflet(locations) %>%
addTiles() %>%
addMarkers(lng = ~Longitude, lat = ~Latitude, popup = ~ Location)
This is awesome! So the addTiles function adds base
tiles to map from OpenStreetMap.
addMarkers adds a marker at each location specified by the
Lat/Long arguments from the DF. And the popup augument
creates a little box for each of the Locations in the DF.
The ~ symbol models the coordinates to the map, like
facet_wrap in ggplot.
There are apparently lots of ways to customize, like in ggplot.
leaflet(locations) %>%
addWMSTiles("https://www.gebco.net/data_and_products/gebco_web_services/web_map_service/mapserv?",
layers = 'GEBCO_LATEST',
attribution = "Imagery reproduced from the GEBCO_2014 Grid, version 20150318, www.gebco.net") %>%
addCircleMarkers(lng = ~Longitude,
lat = ~Latitude,
popup = ~ Location,
radius = 5,
# set fill properties
fillColor = "salmon",
fillOpacity = 1,
# set stroke properties
stroke = T,
weight = 0.5,
color = "white",
opacity = 1)
x