Preamble

… some short words …

Needed libraries and data:

library(dplyr)
library(reshape2)
library(ggplot2)
attach("data/fjolst.RData")

Subsampling

… some short words …

Initial filtering and joining:

Selected years (posterior selection)

Issues:

  • The year in question applies to specific species and area - need double checking.
years <- 
  expand.grid(year = c(1932,1945,1949,1961,1968,1971,1974,1979,1983,
                                 1986,1989,1992,1996,2000,2005,2010,2014,1927),
              species.id = 1,
              season = "feeding",
              stringsAsFactors = FALSE) %>%
  bind_rows(expand.grid(year = c(1932,1941, 1961,1968,1971,1979,1983,1986,1989,
                       1992,1996,2000,2005,2010,2014),
                        species.id = 1,
                        season = "spawning",
                        stringsAsFactors = FALSE)) %>%
  bind_rows(expand.grid(year = c(1971,1979,1989,1992,1996,2000,2005,2010,2014,1982),
                        species.id = 3,
                        season = "spawning",
                        stringsAsFactors = FALSE)) %>%
  bind_rows(expand.grid(year = c(1983,1986,1996,2000,2005,2010,2014,1980),
                        species.id = 5,
                        season = "spawning",
                        stringsAsFactors = FALSE)) %>%
  bind_rows(expand.grid(year = c(1960,1971,1074,1996, 2000,2005,2010,2014),
                        species.id = 23,
                        season = "feeding",
                        stringsAsFactors = FALSE)) %>%
  mutate(selected = TRUE)

Subselection:

  • Select target species from the “Fish” table
  • Age has to be determined (more likely that otolith is archieved)
  • Restrict by some broad length range (covers full target range for all species)
  • Join with selected fields in the “Station” table
  • Mark selected years
d <- Fish %>%
  filter(species.id %in% c(1, 3, 5, 23),
         !is.na(age),
         length %in% c(30:74)) %>%
  inner_join(Station %>%
              filter(year %in% 1920:2014) %>%
              select(synis.id, year, month, square, gear.id, sample.class)) %>%
  mutate(season = ifelse(month %in% 3:5, "spawning","feeding")) %>%
  left_join(years) %>%
  mutate(selected = ifelse(is.na(selected), FALSE, selected))

Filter by stratification

Selected seasons and squares (species dependent): (NO LONGER NEEDED)

d <- 
  d %>%
  filter((  species.id ==  1 & season == "spawning" & length %in% 65:74 & square %in% c(370:372)) |
           (species.id ==  1 & season == "feeding"  & length %in% 65:74 & square %in% c(673,674)) |
           (species.id ==  3 & season == "spawning" & length %in% 65:74 & square %in% c(323,324)) |
           (species.id ==  5 & season == "spawning" & length %in% 30:39 & square %in% c(323,324)) |
           (species.id == 23 & season == "feeding"  & length %in% 40:49 & square %in% c(623,624))
         )

Overview for all species

d %>%
  group_by(year, selected, season, species.id) %>%
  summarise(n = 1) %>%
  ggplot(aes(year, n, colour = selected)) +
  geom_point() +
  facet_wrap(~ species.id + season , scale = "free_y", ncol = 1) +
  scale_colour_brewer(palette = "Set1")

Overview by species for each year

species <- data_frame(species.id = c(1, 3, 5, 23),
                      species = c("Cod", "Saithe", "Golden redfish", "Plaice"))

tmp <- 
  d %>%
  group_by(species.id, year, selected, season) %>%
  summarise(n = n()) %>%
  ungroup()

Cod

tmp %>%
  filter(species.id == 1) %>%
  ggplot(aes(year, n, colour = selected)) +
  geom_hline(yintercept = 7, colour = "grey") +
  geom_point() +
  theme_bw() +
  facet_grid(season ~ ., scale = "free_y") +
  scale_colour_brewer(palette = "Set1") +
  theme(legend.position = c(0.2, 0.8)) +
  labs(x = NULL, 
       y = "Number of otoliths",
       title = "Cod",
       colour = "Years sampled") +
  expand_limits(x = c(1925,2015), y = 0) +
  scale_x_continuous(breaks = seq(1930, 2015, by = 10))

Saithe

tmp %>%
  filter(species.id == 3) %>%
  ggplot(aes(year, n, colour = selected)) +
  geom_hline(yintercept = 7, colour = "grey") +
  geom_point() +
  theme_bw() +
  facet_grid(season ~ ., scale = "free_y") +
  scale_colour_brewer(palette = "Set1") +
  theme(legend.position = c(0.2, 0.8)) +
  labs(x = NULL, 
       y = "Number of otoliths",
       title = "Saithe",
       colour = "Years sampled") +
  expand_limits(x = c(1925,2015), y = 0) +
  scale_x_continuous(breaks = seq(1930, 2015, by = 10))

Golden redfish

tmp %>%
  filter(species.id == 5) %>%
  ggplot(aes(year, n, colour = selected)) +
  geom_hline(yintercept = 7, colour = "grey") +
  geom_point() +
  theme_bw() +
  facet_grid(season ~ ., scale = "free_y") +
  scale_colour_brewer(palette = "Set1") +
  theme(legend.position = c(0.2, 0.8)) +
  labs(x = NULL, 
       y = "Number of otoliths",
       title = "Golden redfish",
       colour = "Years sampled") +
  expand_limits(x = c(1925,2015), y = 0) +
  scale_x_continuous(breaks = seq(1930, 2015, by = 10))

Plaice

tmp %>%
  filter(species.id == 23) %>%
  ggplot(aes(year, n, colour = selected)) +
  geom_hline(yintercept = 7, colour = "grey") +
  geom_point() +
  theme_bw() +
  facet_grid(season ~ ., scale = "free_y") +
  scale_colour_brewer(palette = "Set1") +
  theme(legend.position = c(0.2, 0.8)) +
  labs(x = NULL, 
       y = "Number of otoliths",
       title = "Plaice",
       colour = "Years sampled") +
  expand_limits(x = c(1925,2015), y = 0) +
  scale_x_continuous(breaks = seq(1930, 2015, by = 10))

Stuff not run yet

samples <- 
  d %>% 
  filter(selected == TRUE) %>%
  select(species.id, year, season, synis.id, nr, age, length, square) %>%
  group_by(year, species.id, season) %>%
  sample_n(size = 7, replace = FALSE) %>%
  ungroup() %>%
  arrange(year, season, species.id, synis.id, nr)
write.csv(sample, file = "data/sampled.cvs")

library(DT)
datatable(samples)