trawl.data<-survdat %>%
select(ID, EST_YEAR,SEASON, STRATUM, DECDEG_BEGLAT,DECDEG_BEGLON,
SVSPP, COMNAME, CATCHSEX,BIOMASS, AVGDEPTH, ABUNDANCE, LENGTH, NUMLEN) %>%
filter(STRATUM >= 01010 & STRATUM <= 01760) %>%
filter(STRATUM!=1310 & STRATUM!=1320 & STRATUM!=1330 & STRATUM!=1350 &
STRATUM!=1410 & STRATUM!=1420 & STRATUM!=1490) %>%
filter(SEASON == "SPRING" | SEASON == "FALL") %>%
filter(EST_YEAR >= 1970 & EST_YEAR < 2018) %>%
filter(!is.na(BIOMASS))
Generate template of all sampling occasions by selecting distinct cases by ID, EST_YEAR, SEASON, STRATUM, and AVGDEPTH.
template<-trawl.data %>%
distinct(ID, EST_YEAR, SEASON, STRATUM, AVGDEPTH,.keep_all = TRUE) %>%
select(ID, EST_YEAR, SEASON, STRATUM,DECDEG_BEGLAT,DECDEG_BEGLON,AVGDEPTH)
Create a dataframe to add common names by SVSPP code.
COMNAMEdata<-trawl.data %>%
select(SVSPP, COMNAME) %>%
distinct(SVSPP, .keep_all = TRUE)
Sum total biomass by species in each year.
biomass.spp.year<-trawl.data %>%
group_by(SVSPP, EST_YEAR) %>%
summarize(ANNUAL_SPECIES_BIOMASS = sum(BIOMASS))
biomass.spp<-trawl.data %>%
group_by(ID, SVSPP) %>%
summarize(SPECIES_BIOMASS = sum(BIOMASS)) %>%
right_join(template, by = "ID") %>%
left_join(biomass.spp.year, by = c("EST_YEAR", "SVSPP"))
Calculate total biomass per tow, not grouped by species. Will use this later to calculate proportion of species biomass per by tow.
biomass.tow<-trawl.data %>%
group_by(ID) %>%
summarize(TOW_BIOMASS = sum(BIOMASS)) %>%
right_join(template, by = "ID")
Calculate the number of species in each tow. This is a little indirect, but it does the job.
spp.tow<-trawl.data %>%
group_by(ID) %>%
count(SVSPP) %>%
mutate(count = n/n) %>%
group_by(ID) %>%
summarise(NSPECIES = sum(count))
Calculate the mean length of each fish.
*Join with the mean length dataframe for possible future analysis of fish lengths/sizes.
mean.lengths<-trawl.data %>%
group_by(SVSPP,ID) %>%
mutate(MEAN_LENGTH = weighted.mean(LENGTH, NUMLEN,na.rm = TRUE), n = sum(NUMLEN)) %>%
select(ID, SVSPP, MEAN_LENGTH, n) %>%
distinct(ID, SVSPP, .keep_all = TRUE)
length.spp<-trawl.data %>%
group_by(ID, SVSPP) %>%
nest(LENGTH:NUMLEN, key = "LENGTH") %>%
left_join(mean.lengths, by = c("ID", "SVSPP"))
Long data with biomass by species by tow, mean length, and tow metadata (sum biomass, avg depth, lat/long, etc).
clean_trawl<-biomass.spp %>%
left_join(COMNAMEdata, by = "SVSPP") %>%
select(ID, SVSPP, COMNAME, SPECIES_BIOMASS) %>%
left_join(mean.lengths, by = c("ID", "SVSPP")) %>%
left_join(biomass.tow, by = "ID")
Tidy dataset by tow with fully nested species data.
tidy_trawl<-mean.lengths %>%
left_join(biomass.spp, by = c("ID", "SVSPP")) %>%
left_join(COMNAMEdata, by = "SVSPP") %>%
left_join(biomass.tow, by = "ID") %>%
select(ID, SVSPP, COMNAME, SPECIES_BIOMASS, MEAN_LENGTH, n, TOW_BIOMASS) %>%
mutate(BIOMASS_PROP = SPECIES_BIOMASS/TOW_BIOMASS) %>%
select(-TOW_BIOMASS) %>%
group_by(ID) %>%
nest() %>%
left_join(biomass.tow, by = "ID") %>%
left_join(spp.tow, by = "ID")
Calculate center of species biomass for each year.
centerofbiomass<-biomass.spp %>%
filter(!is.na(SPECIES_BIOMASS),
!is.na(ANNUAL_SPECIES_BIOMASS)) %>%
mutate(weightedLAT = (SPECIES_BIOMASS/ANNUAL_SPECIES_BIOMASS)*DECDEG_BEGLAT) %>%
mutate(weightedLON = (SPECIES_BIOMASS/ANNUAL_SPECIES_BIOMASS)*DECDEG_BEGLON) %>%
group_by(SVSPP,EST_YEAR) %>%
summarise(CENTER_LAT = sum(weightedLAT), CENTER_LAT = sum(weightedLON)) %>%
filter(!is.na(CENTER_LAT),
!is.na(CENTER_LAT)) %>%
left_join(COMNAMEdata, by = "SVSPP")
Select only species that occur in all years of the time series.
center_fullTS<-centerofbiomass %>%
group_by(SVSPP, COMNAME) %>%
count() %>% #calculates number of years each spp occurs
filter(n == (max(trawl.data$EST_YEAR) - min(trawl.data$EST_YEAR) + 1)) %>% #filter missing yr species
select(SVSPP)
center_fullTS<-centerofbiomass %>%
semi_join(center_fullTS, by = "SVSPP")
Plot center of biomass.
loopbydf<-COMNAMEdata %>%
semi_join(center_fullTS, by = "SVSPP") %>%
arrange(SVSPP) %>%
mutate(loopby = seq(1,58,1))
plotlist<-list()
for(i in loopbydf$loopby){
tempdf<-center_fullTS %>%
filter(SVSPP == loopbydf$SVSPP[loopbydf$loopby == i]) %>%
mutate(smoothed = c(NA,NA,rollmean(CENTER_LAT,5),NA,NA))
plotlist[[i]]<-ggplot() + geom_point(data = tempdf,aes(x = EST_YEAR, y = CENTER_LAT)) + geom_line(data = tempdf,aes(x = EST_YEAR, y = CENTER_LAT), size = .5) + geom_line(data = tempdf,aes(x = EST_YEAR, y = smoothed), size = 0.9, color = "blue") + labs(x = "Year", y = "Center of latitude", title = paste('Species:', unique(tempdf$COMNAME))) + theme(panel.grid = element_blank())
}
#just plots the first 8 for example
list1 = plotlist[c(1:8)]
do.call(grid.arrange,c(list1, ncol = 4))
Bring in species traits data and pull out “functional groups.”
speciestraits<-read_csv(file = paste(shared.path, "Mills Lab/Projects/Pew_project/Metadata/species names and characteristics.csv", sep = "")) %>%
select(SVSPP, COMNAME, HARE_ET_AL_2016_Functional_Group) %>%
rename("functgroup" = "HARE_ET_AL_2016_Functional_Group") %>%
replace(.,is.na(.), "Unclassified")