NMFS Trawl Data

Keep only unique tows

IDnoyear<-trawl.data %>% 
  distinct(ID, .keep_all = TRUE) %>% 
  separate(ID, into = c("year","ID2"), sep = 4, remove = FALSE)
IDnoyear %>% 
  group_by(EST_YEAR, SEASON) %>% 
  summarize(n = n()) %>% 
  ggplot() + geom_bar(aes(EST_YEAR, n, fill = SEASON),stat = "identity") + 
  theme(panel.grid = element_blank()) +
  labs(x = "Year", y = "", title = "Tows per Year")

IDnoyear %>% 
  group_by(STRATUM, SEASON) %>% 
  summarize(n = n()) %>% 
  ggplot() + geom_bar(aes(STRATUM, n, fill = SEASON),stat = "identity") + 
  theme(panel.grid = element_blank()) +
  labs(x = "Year", y = "", title = "Tows per Stratum")

ID2wlatlong<-IDnoyear %>% 
  group_by(ID2) %>% 
  summarize(n = n()) %>% 
  right_join(IDnoyear, by = "ID2") %>% 
  select(ID2, n, EST_YEAR, DECDEG_BEGLON, DECDEG_BEGLAT, SEASON, STRATUM) %>% 
  filter(n > 1) %>% 
  arrange(desc(n))

IDnoyear %>% 
  group_by(ID2) %>% 
  summarize(n = n()) %>% 
  right_join(IDnoyear, by = "ID2") %>% 
  select(ID2, n, EST_YEAR, DECDEG_BEGLON, DECDEG_BEGLAT, SEASON, STRATUM) %>% 
  group_by(n) %>% 
  ggplot(aes(n)) + geom_histogram() + 
  labs(x = "ID repeated", title = "Number of times each tow # occurs")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

uniqueID2<-as.vector(unique(ID2wlatlong$ID2))

ID2list<-list()
meanlist<-list()

for(i in uniqueID2){
firstrec<-ID2wlatlong %>% 
  filter(ID2 == i) %>% 
  mutate(sortID = seq(1:length(ID2wlatlong$ID2[ID2wlatlong$ID2 == i]))) %>% 
  filter(sortID == 1)

firstlon<-as.numeric(firstrec$DECDEG_BEGLON)
firstlat<-as.numeric(firstrec$DECDEG_BEGLAT)
xy<-cbind(firstlon,firstlat)

temp<-ID2wlatlong %>% 
  filter(ID2 == i)

uniqueyear = as.vector(unique(as.character(temp$EST_YEAR)))

for(j in uniqueyear){

tempj<-temp %>% 
    filter(EST_YEAR == j)

lon2<-as.numeric(tempj$DECDEG_BEGLON)
lat2<-as.numeric(tempj$DECDEG_BEGLAT)
xy2<-cbind(lon2,lat2)


dist<-distm (xy,xy2, fun = distHaversine)

ID2list[[i]][j] = as.numeric(diag(dist)/1000)
}
}

flatIDs<-do.call(rbind,ID2list)
## Warning in (function (..., deparse.level = 1) : number of columns of result
## is not a multiple of vector length (arg 2)
flatIDs %>% 
  as_tibble(flatIDs) %>% 
  mutate(ID2 = uniqueID2) %>% 
  gather(key = "EST_YEAR", value = "meandist", 1:21) %>% 
  distinct(ID2,meandist, .keep_all = TRUE) %>% 
  group_by(ID2) %>% 
  summarise(n = n(), meandistKM = mean(meandist)) %>% 
  arrange(desc(n)) %>% 
  ggplot(aes(meandistKM)) + geom_histogram() + labs(x = "Distance (km)", y = "", 
                                              title = "Distance between tows of same ID in different years")
## Warning: The `.name_repair` argument to `as_tibble()` takes precedence over
## the deprecated `validate` argument.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.