read in data and plot length vs. weight….
knitr::opts_chunk$set(echo = TRUE)
library(readxl)
library(knitr)
library(lubridate) #manage date and time data
library(ggplot2) #make plots
library(tidyverse)
library(reshape2)
library(xlsx)
a<-"C:/Users/gbbuck/Documents"
setwd(a)
#'first read' ASL data
tmp <- read_excel("qryHeaderFish_join.xlsx")
#finalized ASL data
tmp2 <- read_excel("qryHeaderFish_join2.xlsx")
historic <- read_excel("qryAllFish2008_current.xlsx")
tmp <- tmp[,c(1:3,8,14:19)]
tmp2 <- tmp2[,c(1:3,8,14:19)]
names(tmp)<-c("FileName","SampleDate","gear","Subdistrict","FishNumber","Weight","Length","Sex","Gonad","Age")
names(tmp2)<-c("FileName","SampleDate","gear","Subdistrict","FishNumber","Weight","Length","Sex","Gonad","Age")
plot(tmp$Weight,tmp$Length,xlab = "weight(g)",ylab = "length(mm)")
Looks fine to me. Next boxplot weight by age to spot outliers that might need to be re-aged:
t <- tmp[(tmp$Age > 2 & tmp$Age < 13),]
p <- ggplot(t,aes(x = as.factor(Age), y = Weight)) +
geom_boxplot()
print(p)
After an iterative process of re-reading outliers until I get a standard deviation of weight at age roughly in line with historical dataset I end up here:
t <- tmp2[(tmp2$Age > 2 & tmp2$Age < 13),]
p <- ggplot(t,aes(x = as.factor(Age), y = Weight)) +
geom_boxplot()
print(p)
Comparing 2019 weight at age with historical (2008-2018) weight at age it looks like the major age’s of 2019 (5 & 6) are heavier than the historic mean but the standard deviation (SD) looks similar to the historic SD’s.
#build table of current and historic mean and SD
a <- aggregate(Weight~Age,data=historic,sd)
a <- a[(a$Age>3 & a$Age<15),]
names(a) <- c("Age","Historic.SD")
b <- aggregate(Weight~Age,data=tmp2,sd)
b <- b[(b$Age>3 & b$Age<15),]
names(b) <- c("Age","Current.SD")
tbl <- round(merge(a,b),digits = 2)
a <- aggregate(Weight~Age,data=historic,mean)
a <- a[(a$Age>3 & a$Age<15),]
names(a) <- c("Age","Historic.Mean")
b <- aggregate(Weight~Age,data=tmp2,mean)
b <- b[(b$Age>3 & b$Age<15),]
names(b) <- c("Age","Current.Mean")
c <- round(merge(a,b))
tbl <- merge(c,tbl)
kable(tbl)
| Age | Historic.Mean | Current.Mean | Historic.SD | Current.SD |
|---|---|---|---|---|
| 4 | 186 | 238 | 42.18 | 33.48 |
| 5 | 233 | 278 | 64.81 | 51.52 |
| 6 | 289 | 300 | 63.49 | 62.20 |
| 7 | 334 | 324 | 70.48 | 74.21 |
| 8 | 376 | 364 | 65.08 | 88.94 |
| 9 | 405 | 398 | 66.84 | 83.50 |
| 10 | 435 | 430 | 143.14 | 89.92 |
| 11 | 462 | 450 | 76.87 | 93.40 |
| 12 | 489 | 478 | 73.48 | 111.84 |
| 13 | 505 | 455 | 80.26 | 135.48 |
| 14 | 522 | 466 | 82.43 | 127.69 |
Look at those fat little age-4 and age-5 herring! Looks like we have a nice little recruitment going on here.
tmp2$group <- "current"
historic$group <- "historic"
a <- tmp2[tmp2$gear==5,c("Age","Weight","group")]
b <- historic[historic$gear==5,c("Age","Weight","group")]
combined <- rbind(a,b)
combined <- subset(combined,(Weight>0 & Weight<1000))
combined <- subset(combined,Age<21)
ggplot(combined,aes(x=as.factor(Age),y=Weight,fill=group))+
geom_boxplot()
Purse Seine = 5 Gilnet = 4
t <- subset(tmp2,Age<21)
tbl <- table(t$gear,t$Age)
kable(tbl)
| 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 17 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | 0 | 1 | 17 | 49 | 38 | 48 | 38 | 26 | 19 | 6 | 3 | 0 | 0 | 0 |
| 5 | 3 | 77 | 846 | 891 | 483 | 378 | 240 | 192 | 133 | 51 | 14 | 6 | 1 | 1 |
ggplot(t,aes(x = Age)) +
geom_bar() +
facet_grid(gear~.,scales = "free_y")
So our 2019 ASL dataset consists of 3316 purse seine samples with viable ages and 245 gillnet samples with viable ages. We will apply the full gilllnet sample to the full gillnet harvest. The purse seine harvest might be usefully stratified. Here is a summary of our available purse seine data by date and subdistrict (21 = Hagemeister, 12 = Nunavachak)
t <- subset(tmp2,Age<21)
t <- subset(t,gear==5)
tbl <- table(t$SampleDate,t$Subdistrict)
kable(tbl)
| 12 | 21 | |
|---|---|---|
| 2019-04-16 | 169 | 0 |
| 2019-04-17 | 360 | 0 |
| 2019-04-18 | 350 | 0 |
| 2019-04-19 | 369 | 0 |
| 2019-04-20 | 343 | 338 |
| 2019-04-21 | 336 | 0 |
| 2019-04-23 | 355 | 0 |
| 2019-04-24 | 345 | 0 |
| 2019-04-25 | 351 | 0 |
Two quick observations:
Our peak aerial survey occurred on 26 May. Therefore 100% of out purse seine data occured pre-peak biomass. This is a known weakness of our current program.
We managed to get some westside (Hagemeister) and eastside (Nunavachak) from the same day–a goal I try to do each year to check for differences in the population structure east and west of Togiak Bay.
Lets examine samples through 4/20 (barplot and smoothed density plot)…
tt <- subset(t,SampleDate<"2019-04-20")
ggplot(tt,aes(x = as.factor(Age))) +
geom_bar() +
facet_grid(Subdistrict~SampleDate)
ggplot(tt,aes(x = Age)) +
geom_density() +
facet_grid(Subdistrict~SampleDate)
Now a look at samples from 4/21 onward….
tt <- subset(t,SampleDate>"2019-04-20")
ggplot(tt,aes(x = as.factor(Age))) +
geom_bar() +
facet_grid(Subdistrict~SampleDate)
ggplot(tt,aes(x = Age)) +
geom_density() +
facet_grid(Subdistrict~SampleDate)
After the first two days, the harvest is really dominated by age-5 and age-6 fish.
There doesn’t appear to be much difference between Hagemeister and Nunavachak harvest.
There is a tantilizing hint of older fish perhaps coming onto the scene in the final two samples (4/24 and 4/25).
Next, a frequency table of age at date and chi-square test of that table…
tbl <- table(t$SampleDate,t$Age)
kable(tbl)
| 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 17 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2019-04-16 | 0 | 2 | 17 | 18 | 39 | 25 | 21 | 21 | 15 | 8 | 3 | 0 | 0 | 0 |
| 2019-04-17 | 0 | 3 | 58 | 60 | 48 | 55 | 46 | 39 | 32 | 12 | 4 | 3 | 0 | 0 |
| 2019-04-18 | 0 | 3 | 55 | 89 | 54 | 50 | 38 | 31 | 22 | 6 | 1 | 1 | 0 | 0 |
| 2019-04-19 | 0 | 9 | 91 | 104 | 46 | 49 | 30 | 19 | 15 | 4 | 2 | 0 | 0 | 0 |
| 2019-04-20 | 2 | 10 | 182 | 191 | 105 | 78 | 46 | 38 | 18 | 8 | 2 | 1 | 0 | 0 |
| 2019-04-21 | 1 | 17 | 121 | 97 | 38 | 28 | 19 | 7 | 5 | 2 | 1 | 0 | 0 | 0 |
| 2019-04-23 | 0 | 11 | 117 | 114 | 60 | 23 | 14 | 9 | 5 | 2 | 0 | 0 | 0 | 0 |
| 2019-04-24 | 0 | 9 | 103 | 102 | 42 | 37 | 17 | 16 | 11 | 5 | 0 | 1 | 1 | 1 |
| 2019-04-25 | 0 | 13 | 102 | 116 | 51 | 33 | 9 | 12 | 10 | 4 | 1 | 0 | 0 | 0 |
chisq.test(tbl)
##
## Pearson's Chi-squared test
##
## data: tbl
## X-squared = 400.79, df = 104, p-value < 2.2e-16
Here’s a table of a chi-square test of each adjacent day….
d <- unique(t$SampleDate)
row.n <- length(d)-1
tbl <- data.frame(matrix(NA,nrow=row.n,ncol = 3))
names(tbl) <- c("Date1","Date2","p.value")
for (i in 1:row.n){
a <- t[t$SampleDate==d[i:i+1],]
f.tbl <- table(a$SampleDate,a$Age)
mod <- chisq.test(f.tbl)
c <- mod$p.value
tbl[i,1] <- d[i]
tbl[i,2] <- d[i+1]
tbl[i,3] <- c
}
tbl$Date1 <- as.POSIXct(tbl$Date1,origin = "1970-01-01")
tbl$Date2 <- as.POSIXct(tbl$Date2,origin = "1970-01-01")
tbl$p.value <- as.numeric(tbl$p.value)
tbl
## Date1 Date2 p.value
## 1 2019-04-15 16:00:00 2019-04-16 16:00:00 6.198203e-30
## 2 2019-04-16 16:00:00 2019-04-17 16:00:00 2.320585e-49
## 3 2019-04-17 16:00:00 2019-04-18 16:00:00 2.852863e-62
## 4 2019-04-18 16:00:00 2019-04-19 16:00:00 6.565364e-190
## 5 2019-04-19 16:00:00 2019-04-20 16:00:00 2.186282e-111
## 6 2019-04-20 16:00:00 2019-04-22 16:00:00 2.225385e-89
## 7 2019-04-22 16:00:00 2019-04-23 16:00:00 7.106590e-105
## 8 2019-04-23 16:00:00 2019-04-24 16:00:00 6.123697e-91
…..and I’ll leave it here for the moment. If Sherri/Sara want to take a swing at this before the our 5/30 meeting, I can send along the raw document and data, otherwise we can pick this back up after our next talk.
………..picking back up 9/4/19 with fish ticket data completed. There was only 205 tons of gillnet data and one sample (n=338) to apply to it. The purse seine harvest divides nicely into three relatively similar sized periods with group 1 defined as harvest through 4/18 from Nunavachuk and Togiak (6.9k ton harvest, n=879), harvest from 4/19 through 4/22 (7.1k ton harvest, n=1742) from Nunavachuk and Togiak and group three for harvest 4/23 onward (8.4k harvest, n=696) taken from Nunavachak and Togiak. We could certainly make more sample groups but other than the first day or two the age structure of the harvest changes very little. Age composition by sample group:
t <- tmp2 %>%
filter(gear == 5) %>%
mutate(group = 2) %>%
filter(Age < 28)
t[t$SampleDate<ymd("2019-04-19"),"group"] <- 1
t[t$SampleDate>ymd("2019-04-23"),"group"] <- 3
ggplot(t,aes(x = Age)) +
geom_bar() +
facet_grid(group~.,scales = "free_y")
Sample size by age (row) and purse seine sample group
tbl <- table(t$Age,t$group)
kable(tbl)
| 1 | 2 | 3 | |
|---|---|---|---|
| 3 | 0 | 3 | 0 |
| 4 | 8 | 47 | 22 |
| 5 | 130 | 511 | 205 |
| 6 | 167 | 506 | 218 |
| 7 | 141 | 249 | 93 |
| 8 | 130 | 178 | 70 |
| 9 | 105 | 109 | 26 |
| 10 | 91 | 73 | 28 |
| 11 | 69 | 43 | 21 |
| 12 | 26 | 16 | 9 |
| 13 | 8 | 5 | 1 |
| 14 | 4 | 1 | 1 |
| 15 | 0 | 0 | 1 |
| 17 | 0 | 0 | 1 |
| 21 | 0 | 1 | 0 |
write.csv(tbl,"ps_freq.csv")
Mean weights
tbl2 <- aggregate(Weight ~ group + Age, data = t, mean)
tbl2 <- dcast(tbl2,Age ~ group)
kable(tbl2)
| Age | 1 | 2 | 3 |
|---|---|---|---|
| 3 | NA | 157.6667 | NA |
| 4 | 258.8750 | 239.3404 | 226.0909 |
| 5 | 305.0000 | 273.9472 | 265.3756 |
| 6 | 316.3114 | 292.3636 | 286.9541 |
| 7 | 346.3936 | 310.2731 | 298.7097 |
| 8 | 383.3769 | 346.4494 | 330.9857 |
| 9 | 410.4476 | 375.5321 | 395.4231 |
| 10 | 449.3516 | 412.6849 | 413.7500 |
| 11 | 474.2029 | 443.2558 | 397.7619 |
| 12 | 512.0769 | 443.9375 | 469.7778 |
| 13 | 523.8750 | 379.4000 | 268.0000 |
| 14 | 418.0000 | 654.0000 | 471.0000 |
| 15 | NA | NA | 299.0000 |
| 17 | NA | NA | 620.0000 |
| 21 | NA | 341.0000 | NA |
write.csv(tbl2,"ps_meanweight.csv")
Gillnet sample frequency and mean weight at age….
t <- tmp2 %>%
filter(gear == 5) %>%
filter(Age < 28)
tbl <- table(t$Age)
tbl2 <- aggregate(Weight ~ Age, data = t, mean)
t <- cbind(tbl,tbl2)
t <- t[,c("Age","Freq","Weight")]
kable(t)
| Age | Freq | Weight |
|---|---|---|
| 3 | 3 | 157.6667 |
| 4 | 77 | 237.5844 |
| 5 | 846 | 276.6418 |
| 6 | 891 | 295.5286 |
| 7 | 483 | 318.5911 |
| 8 | 378 | 356.2857 |
| 9 | 240 | 392.9625 |
| 10 | 192 | 430.2188 |
| 11 | 133 | 452.1278 |
| 12 | 51 | 483.2353 |
| 13 | 14 | 454.0000 |
| 14 | 6 | 466.1667 |
| 15 | 1 | 299.0000 |
| 17 | 1 | 620.0000 |
| 21 | 1 | 341.0000 |
write.csv(t,"gn_samples.csv")
The 2019 Biomass is:
equals:
For pre-peak harvest we apply all three pre-peak PS harvest groups (GN harvest is negligible). We apply PS group 3 to both surveys. A case could be made for making more purse seine groups but there is very little change over time in the age comp after the first couple of days plus there is a natural grouping of three periods in time in terms of the open areas of fishing.