library(plotly)
library(tidyverse)
library(knitr)
library(kableExtra)
library(ggformula)
setwd("~/2020 Herring")
dat <- read.csv("combined.samples.csv")
tmp <- subset(dat,age<28)

tmp <- tmp %>%
  mutate(Catch.Date = as.Date(Catch.Date))

In 2020 there were no gillneters participating in the fishery and only one processor (Icicle) participating. Six 50 lb boxes of frozen herring were shipped to King Salmon After aging the scales, outliers of weight at age were re-evaluated iteratively until I was satisfied with the results (Fig. 1). I end up with 903 useable samples (Table 1). This is enough to form two sampling groups to apply to the harvest and aerial survey data. In order to decide where to draw the line in forming these groups we first examine the ASL samples by catch date (Fig. 2 & 3, Table 2). We can also perform chi-square tests of two scenarios that would keep sample sizes roughly balanced (Table 3):

p <- gf_boxplot(weight~age, group = ~age, data = tmp)
ggplotly(p)

Figure 1. Weight at age of 2020 Togiak herring samples.

p <- ggplot(tmp, aes(x = age)) +
      geom_bar() +
      facet_wrap(~Catch.Date)
ggplotly(p)

Figure 2. Barplot of age by catch date.

ggplot(tmp, aes(x = age, fill = factor(Catch.Date))) + 
  geom_density(alpha = 0.6) +
  labs(fill = "Catch Date")

Figure 3. Density plot of age composition of ASL data by catch date.

Table 1. Togiak ASL samples by age.

m <- aggregate(weight~age, data = tmp,mean)
s <- aggregate(weight~age, data = tmp,sd)
n <- as.data.frame(table(tmp$age))
names(n) <- c("age","n")
tbl <- merge(m,s,by = "age")
names(tbl) <- c("age","mean weight (g)","weight SD")
tbl <- merge(tbl,n,by = "age")

tbl <- tbl %>%
  format(digits = 2)

tbl %>% 
  kable(align = "c") %>%
  column_spec(1:3, width = "3in") %>%
  kable_styling(c("bordered", "condensed"), full_width = F)
age mean weight (g) weight SD n
3 149 20 12
4 175 17 188
5 206 32 179
6 260 42 195
7 283 45 161
8 304 52 74
9 350 61 48
10 395 36 18
11 432 82 16
12 438 38 7
13 441 45 2
14 510 78 2
15 605 NA 1

Table 2. Togiak ASL samples by catch date and sex.

a <- table(tmp$sex,tmp$Catch.Date)
b <- colSums(a)
tbl <- rbind(a,b)
rownames(tbl) <- c("Male","Female","Total")


tbl %>%
  kable(align = "c") %>%
  column_spec(1:7, width = "1in") %>%
  kable_styling(c("bordered", "condensed"), full_width = T)
2020-05-04 2020-05-06 2020-05-08 2020-05-09 2020-05-10 2020-05-13
Male 160 43 83 51 50 87
Female 143 40 88 39 23 88
Total 303 83 171 90 73 175

Table 3. Chi-Square tests.

t <- tmp %>%
  mutate(group = ifelse(Catch.Date < "2020-05-07",1,2))
a <- table(t$group,t$age)
chi <- chisq.test(a)
p <- chi$p.value
row1 <- c(table(t$group),p)

t <- tmp %>%
  mutate(group = ifelse(Catch.Date <= "2020-05-08",1,2))
a <- table(t$group,t$age)
chi <- chisq.test(a)
p <- chi$p.value
row2 <- c(table(t$group),p)

tbl <- as.data.frame(rbind(row1,row2),row.names = c("scenario 1","scenario 2"))
names(tbl) <- c("group1.n","group2.n","P-value")

tbl %>%
  kable(align = "c") %>%
  column_spec(1:4, width = "1in") %>%
  kable_styling(c("bordered", "condensed"), full_width = T)
group1.n group2.n P-value
scenario 1 386 517 0.0000000
scenario 2 565 338 0.0006347