releaserecap<-readRDS(paste(shared.path, "/Salmon/Data/Growth/Data/releaserecaptdata.rds",sep = "")) %>% 
  filter(RdayInt < 20)

growthdata <- readRDS(paste(shared.path, "/Salmon/Data/Mark-recap/Carlin_growth_fulldata.rds", sep = "")) 

releasedata <- readRDS(paste(shared.path, "/Salmon/Data/Mark-recap/Carlin_all_releases.rds", sep = "")) %>% 
  dplyr::select(Batch_TagID, BatchType, LifeStage)

recapdata <- read_csv(paste(shared.path, "/Salmon/Data/Mark-recap/Carlin_distantrecoveries.csv", sep = "")) %>% 
  dplyr::select(BatchNo, TagNo, DateEstimated, RecDateType, ReturnSource) %>% 
  mutate(Batch_TagID = paste(BatchNo, TagNo, sep = "_"))
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   BatchNo = col_double(),
##   TagNo = col_double(),
##   Lat = col_double(),
##   Long = col_double(),
##   RecaptureMonth = col_double(),
##   RecaptureDay = col_double(),
##   RecaptureYear = col_double(),
##   Length1_cm = col_double(),
##   Length2_cm = col_double(),
##   Weight_kg = col_double(),
##   ScaleSamp = col_logical()
## )
## See spec(...) for full column specifications.

Visualize annulus presence by day at sea.

releaserecap %>% 
  dplyr::select(Batch_TagID, RecaptureMonth, DaysatSea) %>% 
  left_join(growthdata, by = "Batch_TagID") %>% 
  dplyr::select(Batch_TagID, RecaptureMonthNum, marine.circ, FMC, SeaAge, M1, M2, DaysatSea) %>% 
  mutate(noannuli = ifelse(SeaAge == 0 & is.na(M1) == TRUE, 0, NA)) %>% 
  mutate(firstannulus = ifelse(is.na(M1) == FALSE & is.na(M2) == TRUE, 1, NA)) %>% 
  mutate(secondannulus = ifelse(is.na(M2) == FALSE, 2, NA)) %>% 
  gather(key = "annulustype", value = "numannuli", 9:11) %>% 
  filter(!is.na(numannuli)) %>% 
  mutate(SeaDayGroup = 20*DaysatSea %/% 20) %>% 
  dplyr::group_by(SeaDayGroup, numannuli) %>% 
  dplyr::summarise(n = n()) %>% 
  mutate(relfreq = n/sum(n)) %>% 
  filter(!is.na(SeaDayGroup)) %>% 
  ggplot(aes(SeaDayGroup, relfreq, fill = as.factor(numannuli))) + geom_bar(position = "stack", stat = "identity") +
  labs(x = "Recapture Month", y = "Relative Frequency", fill = "Number of Annuli")

releaseonly<- releaserecap %>% 
  dplyr::select(Batch_TagID, RecaptureDate, ReleaseDate, RdayInt)
longannuli<-releaserecap %>% 
  dplyr::select(Batch_TagID, RecaptureMonth, DaysatSea) %>% 
  left_join(growthdata, by = "Batch_TagID") %>% 
  dplyr::select(Batch_TagID, RecaptureMonthNum, marine.circ, FMC, SeaAge, M1, M2, DaysatSea) %>% 
  mutate(noannuli = ifelse(SeaAge == 0 & is.na(M1) == TRUE, 0, NA)) %>%
  mutate(firstannulus = ifelse(is.na(M1) == FALSE & is.na(M2) == TRUE, 1, NA)) %>% 
  mutate(secondannulus = ifelse(is.na(M2) == FALSE, 2, NA)) %>% 
  gather(key = "annulustype", value = "numannuli", 9:11) %>% 
  filter(!is.na(numannuli)) 

Mistake 1: 1 annulus after too few days at sea

mistakeFA <- longannuli %>% 
  filter(annulustype == "firstannulus" & DaysatSea < 200) %>% 
  mutate(mistake = "1 annulus after too few days at sea") %>% 
  left_join(releaseonly, by = "Batch_TagID") %>% 
  left_join(releasedata, by = "Batch_TagID") %>% 
  left_join(recapdata, by = "Batch_TagID")
mistakeFA
## # A tibble: 3 x 21
##   Batch_TagID RecaptureMonthN~ marine.circ   FMC SeaAge    M1    M2
##   <chr>                  <dbl>       <dbl> <dbl>  <int> <dbl> <dbl>
## 1 6811_6931                  7          41 0.965      0  2.40    NA
## 2 7005_44155                 7          30 1.37       1  1.94    NA
## 3 8223_119600                7          41 0.622      1  2.18    NA
## # ... with 14 more variables: DaysatSea <dbl>, annulustype <chr>,
## #   numannuli <dbl>, mistake <chr>, RecaptureDate <date>,
## #   ReleaseDate <date>, RdayInt <dbl>, BatchType <fct>, LifeStage <fct>,
## #   BatchNo <dbl>, TagNo <dbl>, DateEstimated <chr>, RecDateType <chr>,
## #   ReturnSource <chr>

Mistake 2: no annulus, but at see more than a year

mistakeNA <- longannuli %>% 
  filter(annulustype == "noannuli" & DaysatSea > 250) %>% 
  mutate(mistake = "no annulus, but at see more than a year") %>% 
  left_join(releaseonly, by = "Batch_TagID") %>% 
  left_join(releasedata, by = "Batch_TagID") %>% 
  left_join(recapdata, by = "Batch_TagID")
mistakeNA
## # A tibble: 2 x 21
##   Batch_TagID RecaptureMonthN~ marine.circ   FMC SeaAge    M1    M2
##   <chr>                  <dbl>       <dbl> <dbl>  <int> <dbl> <dbl>
## 1 6818_2201                 11           5 0.940      0    NA    NA
## 2 6801_5873                  8          13 1.51       0    NA    NA
## # ... with 14 more variables: DaysatSea <dbl>, annulustype <chr>,
## #   numannuli <dbl>, mistake <chr>, RecaptureDate <date>,
## #   ReleaseDate <date>, RdayInt <dbl>, BatchType <fct>, LifeStage <fct>,
## #   BatchNo <dbl>, TagNo <dbl>, DateEstimated <chr>, RecDateType <chr>,
## #   ReturnSource <chr>

Mistake 3: 2 marine annuli in less than 700 days

mistakeSA <- longannuli %>% 
  filter(annulustype == "secondannulus" & DaysatSea < 580) %>% 
  mutate(mistake = "2 marine annuli in less than 700 days") %>% 
  left_join(releaseonly, by = "Batch_TagID") %>% 
  left_join(releasedata, by = "Batch_TagID") %>% 
  left_join(recapdata, by = "Batch_TagID")
mistakeSA
## # A tibble: 7 x 21
##   Batch_TagID RecaptureMonthN~ marine.circ   FMC SeaAge    M1    M2
##   <chr>                  <dbl>       <dbl> <dbl>  <int> <dbl> <dbl>
## 1 7624_4494                  9          76 1.16       2  2.87  4.87
## 2 6909_9366                 10          73 0.922      2  3.12  4.36
## 3 8403_55278                 8          81 0.607      1  2.38  3.74
## 4 7301_66686                 7          77 0.940      1  2.46  3.83
## 5 7504_89317                 7          43 0.972      1  2.73  3.30
## 6 7905_152340                8          59 0.612      1  2.02  3.34
## 7 8101_285462                7          48 0.866      1  2.18  2.98
## # ... with 14 more variables: DaysatSea <dbl>, annulustype <chr>,
## #   numannuli <dbl>, mistake <chr>, RecaptureDate <date>,
## #   ReleaseDate <date>, RdayInt <dbl>, BatchType <fct>, LifeStage <fct>,
## #   BatchNo <dbl>, TagNo <dbl>, DateEstimated <chr>, RecDateType <chr>,
## #   ReturnSource <chr>

Mistake 4: at sea more than 730 days with only 1 annulus

mistakeFA2 <- longannuli %>% 
  filter(annulustype == "firstannulus" & DaysatSea > 730) %>% 
  mutate(mistake = "at sea more than 730 days with only 1 annulus") %>% 
  left_join(releaseonly, by = "Batch_TagID") %>% 
  left_join(releasedata, by = "Batch_TagID") %>% 
  left_join(recapdata, by = "Batch_TagID")
mistakeFA2
## # A tibble: 26 x 21
##    Batch_TagID RecaptureMonthN~ marine.circ   FMC SeaAge    M1    M2
##    <chr>                  <dbl>       <dbl> <dbl>  <int> <dbl> <dbl>
##  1 6907_5314                 10          51 1.10       1  2.87    NA
##  2 7201_6446                  9          53 1.39       1  3.12    NA
##  3 6816_7138                  6          55 1.60       2  2.62    NA
##  4 7201_8910                  6          40 0.870      1  2.57    NA
##  5 7003_20310                 9          64 0.636      1  3.51    NA
##  6 7203_31365                10          54 1.45       1  3.26    NA
##  7 7204_53511                 9          63 1.08       1  3.45    NA
##  8 7204_56068                 9          42 0.868      1  2.15    NA
##  9 7301_61966                 5          53 1.19       0  2.47    NA
## 10 7101_74901                 8          66 1.48       1  3.13    NA
## # ... with 16 more rows, and 14 more variables: DaysatSea <dbl>,
## #   annulustype <chr>, numannuli <dbl>, mistake <chr>,
## #   RecaptureDate <date>, ReleaseDate <date>, RdayInt <dbl>,
## #   BatchType <fct>, LifeStage <fct>, BatchNo <dbl>, TagNo <dbl>,
## #   DateEstimated <chr>, RecDateType <chr>, ReturnSource <chr>

Mistake 5: at sea more than 1000 days with only 2 marine annuli

mistakeSA2 <- longannuli %>% 
  filter(annulustype == "secondannulus" & DaysatSea > 1000) %>% 
  mutate(mistake = "at sea more than 1000 days with only 2 marine annuli") %>% 
  left_join(releaseonly, by = "Batch_TagID") %>% 
  left_join(releasedata, by = "Batch_TagID") %>% 
  left_join(recapdata, by = "Batch_TagID")
mistakeSA2
## # A tibble: 8 x 21
##   Batch_TagID RecaptureMonthN~ marine.circ   FMC SeaAge    M1    M2
##   <chr>                  <dbl>       <dbl> <dbl>  <int> <dbl> <dbl>
## 1 7601_2885                  7          63 1.27       2  2.74  4.49
## 2 6908_9727                  6          75 0.955      2  2.73  4.26
## 3 7303_77606                 6          66 1.56       2  3.09  5.15
## 4 7903_183355                8          66 1.32       2  3.04  4.13
## 5 8604_381027                7          66 1.20       2  2.57  4.00
## 6 8604_381027                7          66 1.20       2  2.57  4.00
## 7 8604_381027                7          66 1.20       2  2.57  4.00
## 8 8604_381027                7          66 1.20       2  2.57  4.00
## # ... with 14 more variables: DaysatSea <dbl>, annulustype <chr>,
## #   numannuli <dbl>, mistake <chr>, RecaptureDate <date>,
## #   ReleaseDate <date>, RdayInt <dbl>, BatchType <fct>, LifeStage <fct>,
## #   BatchNo <dbl>, TagNo <dbl>, DateEstimated <chr>, RecDateType <chr>,
## #   ReturnSource <chr>
mistakes<-rbind(mistakeFA, mistakeNA, mistakeSA,mistakeFA2, mistakeSA2)

Saved using: write_csv(mistakes,“mistakendates.csv”)

Dataset without mistaken annuli scales

longannuli %>% 
  anti_join(mistakes, by = "Batch_TagID") %>% 
  mutate(SeaDayGroup = 20*DaysatSea %/% 20) %>% 
  dplyr::group_by(SeaDayGroup, numannuli) %>% 
  dplyr::summarise(n = n()) %>% 
  filter(!is.na(SeaDayGroup)) %>% 
  ggplot(aes(SeaDayGroup, n, fill = as.factor(numannuli))) + geom_histogram(stat = "identity") +
  labs(x = "Number of Days at Sea", y = "Number of Individuals", fill = "Number of Annuli")

Same plot but with removed impossible annuli scales.

longannuli %>% 
  anti_join(mistakes, by = "Batch_TagID") %>% 
  mutate(SeaDayGroup = 20*DaysatSea %/% 20) %>% 
  dplyr::group_by(SeaDayGroup, numannuli) %>% 
  dplyr::summarise(n = n()) %>% 
  mutate(relfreq = n/sum(n)) %>% 
  filter(!is.na(SeaDayGroup)) %>% 
  ggplot(aes(SeaDayGroup, relfreq, fill = as.factor(numannuli))) + geom_bar(position = "stack", stat = "identity") +
  labs(x = "Recapture Month", y = "Relative Frequency", fill = "Number of Annuli")

If this was a linear process…

longannuli %>% 
  anti_join(mistakes, by = "Batch_TagID") %>% 
  ggplot(aes(DaysatSea, marine.circ)) + geom_point() + geom_smooth(method = "lm")

goodannuli<-longannuli %>% 
  anti_join(mistakes, by = "Batch_TagID")

summary(lm(marine.circ ~ DaysatSea, data = goodannuli))
## 
## Call:
## lm(formula = marine.circ ~ DaysatSea, data = goodannuli)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.1632  -3.4937  -0.1495   3.2226  30.3667 
## 
## Coefficients:
##              Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) 12.172988   0.757465   16.07 <0.0000000000000002 ***
## DaysatSea    0.065116   0.001561   41.72 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.665 on 1415 degrees of freedom
## Multiple R-squared:  0.5516, Adjusted R-squared:  0.5513 
## F-statistic:  1741 on 1 and 1415 DF,  p-value: < 0.00000000000000022
goodannuli$DaysGroup = NA
goodannuli$DaysGroup[goodannuli$DaysatSea < 250] = "as0SW"
goodannuli$DaysGroup[goodannuli$DaysatSea > 250 & goodannuli$DaysatSea < 750] = "as1SW"
goodannuli$DaysGroup[goodannuli$DaysatSea > 750] = "as2SW"

goodannuli %>% 
  mutate(dayspercirc = DaysatSea/marine.circ) %>% 
  ggplot(aes(dayspercirc)) + geom_histogram() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

goodannuli %>% 
  mutate(dayspercirc = DaysatSea/marine.circ) %>% 
  group_by(DaysGroup) %>% 
  summarise(meancirday = mean(dayspercirc), n = n(), meandaysatsea = mean(DaysatSea), sddays = sd(DaysatSea))
## # A tibble: 3 x 5
##   DaysGroup meancirday     n meandaysatsea sddays
##   <chr>          <dbl> <int>         <dbl>  <dbl>
## 1 as0SW           9.43    37          84.7   27.7
## 2 as1SW          11.1   1333         475.    39.9
## 3 as2SW          13.8     47         812.    32.5