read in data and plot length vs. weight….

knitr::opts_chunk$set(echo = TRUE)

library(readxl) 
library(knitr)
library(lubridate) #manage date and time data
library(ggplot2) #make plots
library(tidyverse)
library(reshape2)
library(xlsx)

a<-"C:/Users/gbbuck/Documents"
setwd(a)

#'first read' ASL data
tmp <- read_excel("qryHeaderFish_join.xlsx")

#finalized ASL data
tmp2 <- read_excel("qryHeaderFish_join2.xlsx")
historic <- read_excel("qryAllFish2008_current.xlsx")
tmp <- tmp[,c(1:3,8,14:19)]
tmp2 <- tmp2[,c(1:3,8,14:19)]
names(tmp)<-c("FileName","SampleDate","gear","Subdistrict","FishNumber","Weight","Length","Sex","Gonad","Age")
names(tmp2)<-c("FileName","SampleDate","gear","Subdistrict","FishNumber","Weight","Length","Sex","Gonad","Age")

plot(tmp$Weight,tmp$Length,xlab = "weight(g)",ylab = "length(mm)")

Looks fine to me. Next boxplot weight by age to spot outliers that might need to be re-aged:

t <- tmp[(tmp$Age > 2 & tmp$Age < 13),]
p <- ggplot(t,aes(x = as.factor(Age), y = Weight)) + 
  geom_boxplot()

print(p)

Re-age outliers

After an iterative process of re-reading outliers until I get a standard deviation of weight at age roughly in line with historical dataset I end up here:

t <- tmp2[(tmp2$Age > 2 & tmp2$Age < 13),]

p <- ggplot(t,aes(x = as.factor(Age), y = Weight)) + 
  geom_boxplot()

print(p)

Comparing 2019 weight at age with historical (2008-2018) weight at age it looks like the major age’s of 2019 (5 & 6) are heavier than the historic mean but the standard deviation (SD) looks similar to the historic SD’s.

#build table of current and historic mean and SD
a <- aggregate(Weight~Age,data=historic,sd)
a <- a[(a$Age>3 & a$Age<15),]
names(a) <- c("Age","Historic.SD")

b <- aggregate(Weight~Age,data=tmp2,sd)
b <- b[(b$Age>3 & b$Age<15),]
names(b) <- c("Age","Current.SD")

tbl <- round(merge(a,b),digits = 2)

a <- aggregate(Weight~Age,data=historic,mean)
a <- a[(a$Age>3 & a$Age<15),]
names(a) <- c("Age","Historic.Mean")

b <- aggregate(Weight~Age,data=tmp2,mean)
b <- b[(b$Age>3 & b$Age<15),]
names(b) <- c("Age","Current.Mean")

c <- round(merge(a,b))
tbl <- merge(c,tbl)

kable(tbl)

Age	Historic.Mean	Current.Mean	Historic.SD	Current.SD
4	186	238	42.18	33.48
5	233	278	64.81	51.52
6	289	300	63.49	62.20
7	334	324	70.48	74.21
8	376	364	65.08	88.94
9	405	398	66.84	83.50
10	435	430	143.14	89.92
11	462	450	76.87	93.40
12	489	478	73.48	111.84
13	505	455	80.26	135.48
14	522	466	82.43	127.69

boxplot current and historic purse seine havest

Look at those fat little age-4 and age-5 herring! Looks like we have a nice little recruitment going on here.

tmp2$group <- "current"
historic$group <- "historic"

a <- tmp2[tmp2$gear==5,c("Age","Weight","group")]
b <- historic[historic$gear==5,c("Age","Weight","group")]
combined <- rbind(a,b)
combined <- subset(combined,(Weight>0 & Weight<1000))
combined <- subset(combined,Age<21)

  
ggplot(combined,aes(x=as.factor(Age),y=Weight,fill=group))+
  geom_boxplot()

Age frequency table by gear

Purse Seine = 5 Gilnet = 4

t <- subset(tmp2,Age<21)
tbl <- table(t$gear,t$Age)
kable(tbl)

	3	4	5	6	7	8	9	10	11	12	13	14	15	17
4	0	1	17	49	38	48	38	26	19	6	3	0	0	0
5	3	77	846	891	483	378	240	192	133	51	14	6	1	1

ggplot(t,aes(x = Age)) +
  geom_bar() +
  facet_grid(gear~.,scales = "free_y")

So our 2019 ASL dataset consists of 3316 purse seine samples with viable ages and 245 gillnet samples with viable ages. We will apply the full gilllnet sample to the full gillnet harvest. The purse seine harvest might be usefully stratified. Here is a summary of our available purse seine data by date and subdistrict (21 = Hagemeister, 12 = Nunavachak)

t <- subset(tmp2,Age<21)
t <- subset(t,gear==5)

tbl <- table(t$SampleDate,t$Subdistrict)
kable(tbl)

	12	21
2019-04-16	169	0
2019-04-17	360	0
2019-04-18	350	0
2019-04-19	369	0
2019-04-20	343	338
2019-04-21	336	0
2019-04-23	355	0
2019-04-24	345	0
2019-04-25	351	0

Two quick observations:

Our peak aerial survey occurred on 26 May. Therefore 100% of out purse seine data occured pre-peak biomass. This is a known weakness of our current program.
We managed to get some westside (Hagemeister) and eastside (Nunavachak) from the same day–a goal I try to do each year to check for differences in the population structure east and west of Togiak Bay.

Lets examine samples through 4/20 (barplot and smoothed density plot)…

tt <- subset(t,SampleDate<"2019-04-20")
ggplot(tt,aes(x = as.factor(Age))) +
  geom_bar() +
  facet_grid(Subdistrict~SampleDate)

ggplot(tt,aes(x = Age)) +
  geom_density() +
  facet_grid(Subdistrict~SampleDate)

Now a look at samples from 4/21 onward….

tt <- subset(t,SampleDate>"2019-04-20")
ggplot(tt,aes(x = as.factor(Age))) +
  geom_bar() +
  facet_grid(Subdistrict~SampleDate)

ggplot(tt,aes(x = Age)) +
  geom_density() +
  facet_grid(Subdistrict~SampleDate)

Initial observations

After the first two days, the harvest is really dominated by age-5 and age-6 fish.
There doesn’t appear to be much difference between Hagemeister and Nunavachak harvest.
There is a tantilizing hint of older fish perhaps coming onto the scene in the final two samples (4/24 and 4/25).

Next, a frequency table of age at date and chi-square test of that table…

tbl <- table(t$SampleDate,t$Age)
kable(tbl)

	3	4	5	6	7	8	9	10	11	12	13	14	15	17
2019-04-16	0	2	17	18	39	25	21	21	15	8	3	0	0	0
2019-04-17	0	3	58	60	48	55	46	39	32	12	4	3	0	0
2019-04-18	0	3	55	89	54	50	38	31	22	6	1	1	0	0
2019-04-19	0	9	91	104	46	49	30	19	15	4	2	0	0	0
2019-04-20	2	10	182	191	105	78	46	38	18	8	2	1	0	0
2019-04-21	1	17	121	97	38	28	19	7	5	2	1	0	0	0
2019-04-23	0	11	117	114	60	23	14	9	5	2	0	0	0	0
2019-04-24	0	9	103	102	42	37	17	16	11	5	0	1	1	1
2019-04-25	0	13	102	116	51	33	9	12	10	4	1	0	0	0

chisq.test(tbl)

## 
##  Pearson's Chi-squared test
## 
## data:  tbl
## X-squared = 400.79, df = 104, p-value < 2.2e-16

Here’s a table of a chi-square test of each adjacent day….

d <- unique(t$SampleDate)
row.n <- length(d)-1

tbl <- data.frame(matrix(NA,nrow=row.n,ncol = 3))
names(tbl) <- c("Date1","Date2","p.value")

for (i in 1:row.n){
a <- t[t$SampleDate==d[i:i+1],]
f.tbl <- table(a$SampleDate,a$Age)
mod <- chisq.test(f.tbl)
c <- mod$p.value
tbl[i,1] <- d[i]
tbl[i,2] <- d[i+1]
tbl[i,3] <- c

}

tbl$Date1 <- as.POSIXct(tbl$Date1,origin = "1970-01-01")
tbl$Date2 <- as.POSIXct(tbl$Date2,origin = "1970-01-01")
tbl$p.value <- as.numeric(tbl$p.value)

tbl

##                 Date1               Date2       p.value
## 1 2019-04-15 16:00:00 2019-04-16 16:00:00  6.198203e-30
## 2 2019-04-16 16:00:00 2019-04-17 16:00:00  2.320585e-49
## 3 2019-04-17 16:00:00 2019-04-18 16:00:00  2.852863e-62
## 4 2019-04-18 16:00:00 2019-04-19 16:00:00 6.565364e-190
## 5 2019-04-19 16:00:00 2019-04-20 16:00:00 2.186282e-111
## 6 2019-04-20 16:00:00 2019-04-22 16:00:00  2.225385e-89
## 7 2019-04-22 16:00:00 2019-04-23 16:00:00 7.106590e-105
## 8 2019-04-23 16:00:00 2019-04-24 16:00:00  6.123697e-91

…..and I’ll leave it here for the moment. If Sherri/Sara want to take a swing at this before the our 5/30 meeting, I can send along the raw document and data, otherwise we can pick this back up after our next talk.

………..picking back up 9/4/19 with fish ticket data completed. There was only 205 tons of gillnet data and one sample (n=338) to apply to it. The purse seine harvest divides nicely into three relatively similar sized periods with group 1 defined as harvest through 4/18 from Nunavachuk and Togiak (6.9k ton harvest, n=879), harvest from 4/19 through 4/22 (7.1k ton harvest, n=1742) from Nunavachuk and Togiak and group three for harvest 4/23 onward (8.4k harvest, n=696) taken from Nunavachak and Togiak. We could certainly make more sample groups but other than the first day or two the age structure of the harvest changes very little. Age composition by sample group:

t <- tmp2 %>%
      filter(gear == 5) %>%
      mutate(group = 2) %>%
      filter(Age < 28)

t[t$SampleDate<ymd("2019-04-19"),"group"] <- 1
t[t$SampleDate>ymd("2019-04-23"),"group"] <- 3


ggplot(t,aes(x = Age)) +
  geom_bar() +
  facet_grid(group~.,scales = "free_y")

Purse seine groups

Sample size by age (row) and purse seine sample group

tbl <- table(t$Age,t$group)
kable(tbl)

	1	2	3
3	0	3	0
4	8	47	22
5	130	511	205
6	167	506	218
7	141	249	93
8	130	178	70
9	105	109	26
10	91	73	28
11	69	43	21
12	26	16	9
13	8	5	1
14	4	1	1
15	0	0	1
17	0	0	1
21	0	1	0

write.csv(tbl,"ps_freq.csv")

Mean weights

tbl2 <- aggregate(Weight ~ group + Age, data = t, mean)
tbl2 <- dcast(tbl2,Age ~ group)
kable(tbl2)

Age	1	2	3
3	NA	157.6667	NA
4	258.8750	239.3404	226.0909
5	305.0000	273.9472	265.3756
6	316.3114	292.3636	286.9541
7	346.3936	310.2731	298.7097
8	383.3769	346.4494	330.9857
9	410.4476	375.5321	395.4231
10	449.3516	412.6849	413.7500
11	474.2029	443.2558	397.7619
12	512.0769	443.9375	469.7778
13	523.8750	379.4000	268.0000
14	418.0000	654.0000	471.0000
15	NA	NA	299.0000
17	NA	NA	620.0000
21	NA	341.0000	NA

write.csv(tbl2,"ps_meanweight.csv")

Gillnet Samples

Gillnet sample frequency and mean weight at age….

t <- tmp2 %>%
      filter(gear == 5) %>%
      filter(Age < 28)

tbl <- table(t$Age)

tbl2 <- aggregate(Weight ~ Age, data = t, mean)
t <- cbind(tbl,tbl2)
t <- t[,c("Age","Freq","Weight")]
kable(t)

Age	Freq	Weight
3	3	157.6667
4	77	237.5844
5	846	276.6418
6	891	295.5286
7	483	318.5911
8	378	356.2857
9	240	392.9625
10	192	430.2188
11	133	452.1278
12	51	483.2353
13	14	454.0000
14	6	466.1667
15	1	299.0000
17	1	620.0000
21	1	341.0000

write.csv(t,"gn_samples.csv")

Biomass Summary

The 2019 Biomass is:

Peak Suvey: 155,972 tons
Pre-Peak Harvest: 21,851 tons
Post-Season Survey: 157 tons

equals:

2019 Biomass: 177,980 tons

For pre-peak harvest we apply all three pre-peak PS harvest groups (GN harvest is negligible). We apply PS group 3 to both surveys. A case could be made for making more purse seine groups but there is very little change over time in the age comp after the first couple of days plus there is a natural grouping of three periods in time in terms of the open areas of fishing.

2019 Togiak Herring ASL

Greg Buck

17 May 2019