NMFS Trawl Data Cleaning

Select columns of interest
Select stratum for full offshore survey
Eliminate strata not sampled for full time span
Select fall and spring surveys only
Select years 1970-2017
Remove biomass NAs

trawl.data<-survdat %>%
  select(ID, EST_YEAR,SEASON, STRATUM, DECDEG_BEGLAT,DECDEG_BEGLON,
         SVSPP, COMNAME, CATCHSEX,BIOMASS, AVGDEPTH, ABUNDANCE, LENGTH, NUMLEN) %>%
  filter(STRATUM >= 01010 & STRATUM <= 01760) %>%
  filter(STRATUM!=1310 & STRATUM!=1320 & STRATUM!=1330 & STRATUM!=1350 &
           STRATUM!=1410 & STRATUM!=1420 & STRATUM!=1490) %>%
  filter(SEASON == "SPRING" | SEASON == "FALL") %>%
  filter(EST_YEAR >= 1970 & EST_YEAR < 2018) %>%
  filter(!is.na(BIOMASS))

Generate template of all sampling occasions by selecting distinct cases by ID, EST_YEAR, SEASON, STRATUM, and AVGDEPTH.

template<-trawl.data %>%
  distinct(ID, EST_YEAR, SEASON, STRATUM, AVGDEPTH,.keep_all = TRUE) %>%
  select(ID, EST_YEAR, SEASON, STRATUM,DECDEG_BEGLAT,DECDEG_BEGLON,AVGDEPTH)

Create a dataframe to add common names by SVSPP code.

COMNAMEdata<-trawl.data %>% 
  select(SVSPP, COMNAME) %>% 
  distinct(SVSPP, .keep_all = TRUE)

Sum total biomass by species in each year.

biomass.spp.year<-trawl.data %>% 
  group_by(SVSPP, EST_YEAR) %>% 
  summarize(ANNUAL_SPECIES_BIOMASS = sum(BIOMASS))

Sum total biomass by species in each tow.
Add “template” metadata for each tow.
Add annual species biomass column.
Will use this dataframe to calculate center of biomass later on

biomass.spp<-trawl.data %>% 
  group_by(ID, SVSPP) %>% 
  summarize(SPECIES_BIOMASS = sum(BIOMASS)) %>% 
  right_join(template, by = "ID") %>% 
  left_join(biomass.spp.year, by = c("EST_YEAR", "SVSPP"))

Calculate total biomass per tow, not grouped by species. Will use this later to calculate proportion of species biomass per by tow.

biomass.tow<-trawl.data %>% 
  group_by(ID) %>% 
  summarize(TOW_BIOMASS = sum(BIOMASS)) %>% 
  right_join(template, by = "ID")

Calculate the number of species in each tow. This is a little indirect, but it does the job.

spp.tow<-trawl.data %>% 
  group_by(ID) %>% 
  count(SVSPP) %>% 
  mutate(count = n/n) %>% 
  group_by(ID) %>% 
  summarise(NSPECIES = sum(count))

Calculate the mean length of each fish.

Nest all length measurements by species in a “data” column.

*Join with the mean length dataframe for possible future analysis of fish lengths/sizes.

mean.lengths<-trawl.data %>% 
  group_by(SVSPP,ID) %>% 
  mutate(MEAN_LENGTH = weighted.mean(LENGTH, NUMLEN,na.rm = TRUE), n = sum(NUMLEN)) %>% 
  select(ID, SVSPP, MEAN_LENGTH, n) %>% 
  distinct(ID, SVSPP, .keep_all = TRUE)

length.spp<-trawl.data %>% 
  group_by(ID, SVSPP) %>% 
  nest(LENGTH:NUMLEN, key = "LENGTH") %>% 
  left_join(mean.lengths, by = c("ID", "SVSPP"))

Create “clean dataset” in a long form by tow ID and species

Long data with biomass by species by tow, mean length, and tow metadata (sum biomass, avg depth, lat/long, etc).

clean_trawl<-biomass.spp %>% 
  left_join(COMNAMEdata, by = "SVSPP") %>% 
  select(ID, SVSPP, COMNAME, SPECIES_BIOMASS) %>% 
  left_join(mean.lengths, by = c("ID", "SVSPP")) %>% 
  left_join(biomass.tow, by = "ID")

Create “tidy dataset” where all species level information is nested

Tidy dataset by tow with fully nested species data.

tidy_trawl<-mean.lengths %>% 
  left_join(biomass.spp, by = c("ID", "SVSPP")) %>% 
  left_join(COMNAMEdata, by = "SVSPP") %>% 
  left_join(biomass.tow, by = "ID") %>% 
  select(ID, SVSPP, COMNAME, SPECIES_BIOMASS, MEAN_LENGTH, n, TOW_BIOMASS) %>% 
  mutate(BIOMASS_PROP = SPECIES_BIOMASS/TOW_BIOMASS) %>% 
  select(-TOW_BIOMASS) %>% 
  group_by(ID) %>% 
  nest() %>% 
  left_join(biomass.tow, by = "ID") %>% 
  left_join(spp.tow, by = "ID")

Center of biomass

Calculate center of species biomass for each year.

centerofbiomass<-biomass.spp %>% 
  filter(!is.na(SPECIES_BIOMASS), 
                !is.na(ANNUAL_SPECIES_BIOMASS)) %>% 
  mutate(weightedLAT = (SPECIES_BIOMASS/ANNUAL_SPECIES_BIOMASS)*DECDEG_BEGLAT) %>% 
  mutate(weightedLON = (SPECIES_BIOMASS/ANNUAL_SPECIES_BIOMASS)*DECDEG_BEGLON) %>% 
  group_by(SVSPP,EST_YEAR) %>% 
  summarise(CENTER_LAT = sum(weightedLAT), CENTER_LAT = sum(weightedLON)) %>% 
  filter(!is.na(CENTER_LAT), 
         !is.na(CENTER_LAT)) %>% 
  left_join(COMNAMEdata, by = "SVSPP")

Select only species that occur in all years of the time series.

center_fullTS<-centerofbiomass %>% 
  group_by(SVSPP, COMNAME) %>% 
  count() %>%                                        #calculates number of years each spp occurs
  filter(n == (max(trawl.data$EST_YEAR) - min(trawl.data$EST_YEAR) + 1)) %>%   #filter missing yr species
  select(SVSPP)

center_fullTS<-centerofbiomass %>% 
  semi_join(center_fullTS, by = "SVSPP")

Plot center of biomass.

loopbydf<-COMNAMEdata %>% 
  semi_join(center_fullTS, by = "SVSPP") %>% 
  arrange(SVSPP) %>% 
  mutate(loopby = seq(1,58,1))

plotlist<-list()

for(i in loopbydf$loopby){
tempdf<-center_fullTS %>% 
  filter(SVSPP == loopbydf$SVSPP[loopbydf$loopby == i]) %>% 
  mutate(smoothed = c(NA,NA,rollmean(CENTER_LAT,5),NA,NA))

plotlist[[i]]<-ggplot() + geom_point(data = tempdf,aes(x = EST_YEAR, y = CENTER_LAT)) + geom_line(data = tempdf,aes(x = EST_YEAR, y = CENTER_LAT), size = .5) + geom_line(data = tempdf,aes(x = EST_YEAR, y = smoothed), size = 0.9, color = "blue") + labs(x = "Year", y = "Center of latitude", title = paste('Species:', unique(tempdf$COMNAME))) + theme(panel.grid = element_blank())
}

#just plots the first 8 for example
list1 = plotlist[c(1:8)]
do.call(grid.arrange,c(list1, ncol = 4))

Bring in species traits data and pull out “functional groups.”

speciestraits<-read_csv(file = paste(shared.path, "Mills Lab/Projects/Pew_project/Metadata/species names and characteristics.csv", sep = "")) %>% 
  select(SVSPP, COMNAME, HARE_ET_AL_2016_Functional_Group) %>% 
  rename("functgroup" = "HARE_ET_AL_2016_Functional_Group") %>% 
  replace(.,is.na(.), "Unclassified")

NMFS Trawl Data Cleaning

LGCarlson

6/24/2019